Corporate Emissions (National)
EPA GHGP Summary Data
This code block processes EPA GHGRP summary data files stored in the
“Data/Raw/GHGP_data_summary_spreadsheets” directory. The data is sourced
from Excel files for each year from 2016 to 2023 that contain multiple
sheets (e.g., “Direct Point Emitters”, “Onshore Oil & Gas Prod.”,
etc.). Although data is available from 2010 onward, note that the
additional sheets—such as those for onshore oil & gas production,
gathering & boosting, transmission pipelines, LDC - Direct
Emissions, SF₆ from Electrical Equipment, and Suppliers—are not
available before 2016. Consequently, the code loops over the specified
years and sheets to combine the available data into a single list, with
each element corresponding to a specific sheet type.
# Define years of interest (note: data for 2010-2015 may be incomplete)
years <- 2016:2023
# List of sheet names to be read from each Excel file
sheets_to_read <- c(
"Direct Point Emitters",
"Onshore Oil & Gas Prod.",
"Gathering & Boosting",
"Transmission Pipelines",
"LDC - Direct Emissions",
"SF6 from Elec. Equip.",
"Suppliers"
)
# Set the directory where GHGRP summary spreadsheets are stored
local_data_dir <- here("Data", "Raw", "EPA_GHGRP_Data_Summary_Spreadsheets")
# Initialize an empty list to store combined data for each sheet type
GHGP_combined_data <- setNames(vector("list", length(sheets_to_read)), sheets_to_read)
# Loop through each year
for (year in years) {
file_name <- paste0("ghgp_data_", year, ".xlsx")
file_path <- file.path(local_data_dir, file_name)
# Check if file exists; if not, skip the year
if (!file.exists(file_path)) {
message("File not found for year ", year, ": ", file_path)
next
}
# Get the list of sheets available in the current file
available_sheets <- excel_sheets(file_path)
# Loop through each desired sheet
for (sheet in sheets_to_read) {
if (sheet %in% available_sheets) {
message("Reading ", sheet, " for year ", year)
# Attempt to read the sheet; skip the first 3 rows (adjust as needed)
sheet_data <- tryCatch({
read_excel(file_path, sheet = sheet, skip = 3, col_types = "text") %>%
clean_names() %>% # Standardize column names
mutate(year = year, .before = 1,
facility_name = str_replace_all(facility_name, "[[:punct:]]", ""),
facility_name = str_remove(facility_name, "[-/(].*")) %>%
mutate(across(everything(), toupper)) %>%
mutate(across(everything(), str_squish))
}, error = function(e) {
message("Failed to read sheet: ", sheet, " in year ", year, " — ", e$message)
NULL
})
# If the sheet was successfully read, combine it with any previous data
if (!is.null(sheet_data)) {
GHGP_combined_data[[sheet]] <- bind_rows(GHGP_combined_data[[sheet]], sheet_data)
}
} else {
message("Sheet not found: ", sheet, " in year ", year)
}
}
}
# Print the combined data for verification
head(GHGP_combined_data$`Direct Point Emitters`) %>% gt() %>% gt::fmt_auto()
| year |
facility_id |
frs_id |
facility_name |
city |
state |
zip_code |
address |
county |
latitude |
longitude |
primary_naics_code |
industry_type_subparts |
industry_type_sectors |
total_reported_direct_emissions |
co2_emissions_non_biogenic |
methane_ch4_emissions |
nitrous_oxide_n2o_emissions |
hfc_emissions |
pfc_emissions |
sf6_emissions |
nf3_emissions |
other_fully_fluorinated_ghg_emissions |
hfe_emissions |
very_short_lived_compounds_emissions |
other_gh_gs_metric_tons_co2e |
biogenic_co2_emissions_metric_tons |
stationary_combustion |
electricity_generation |
adipic_acid_production |
aluminum_production |
ammonia_manufacturing |
cement_production |
electronics_manufacture |
ferroalloy_production |
fluorinated_ghg_production |
glass_production |
hcfc_22_production_from_hfc_23_destruction |
hydrogen_production |
iron_and_steel_production |
lead_production |
lime_production |
magnesium_production |
miscellaneous_use_of_carbonates |
nitric_acid_production |
petroleum_and_natural_gas_systems_offshore_production |
petroleum_and_natural_gas_systems_processing |
petroleum_and_natural_gas_systems_transmission_compression |
petroleum_and_natural_gas_systems_underground_storage |
petroleum_and_natural_gas_systems_lng_storage |
petroleum_and_natural_gas_systems_lng_import_export |
petrochemical_production |
petroleum_refining |
phosphoric_acid_production |
pulp_and_paper_manufacturing |
silicon_carbide_production |
soda_ash_manufacturing |
titanium_dioxide_production |
underground_coal_mines |
zinc_production |
municipal_landfills |
industrial_wastewater_treatment |
manufacture_of_electric_transmission_and_distribution_equipment |
industrial_waste_landfills |
is_some_co2_collected_on_site_and_used_to_manufacture_other_products_and_therefore_not_emitted_from_the_affected_manufacturing_process_unit_s_as_reported_under_subpart_g_or_s |
is_some_co2_reported_as_emissions_from_the_affected_manufacturing_process_unit_s_under_subpart_aa_g_or_p_collected_and_transferred_off_site_or_injected_as_reported_under_subpart_pp |
does_the_facility_employ_continuous_emissions_monitoring |
| 2018 |
1004377.0 |
110043803578 |
121 REGIONAL DISPOSAL FACILITY |
MELISSA |
TX |
75454 |
3820 SAM RAYBURN HIGHWAY |
COLLIN COUNTY |
33.29857 |
-96.53586 |
562212 |
HH |
WASTE |
653854.0 |
NA |
653854.0 |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
653854.0 |
NA |
NA |
NA |
N |
N |
N |
| 2018 |
1010040.0 |
110071159872 |
15185651518662 |
HAZARD |
KY |
40701 |
1021 TORI DRIVE |
PERRY COUNTY |
37.274127 |
-83.239034 |
212112 |
FF |
OTHER |
125981.75 |
NA |
125981.75 |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
125981.75 |
NA |
NA |
NA |
NA |
NA |
N |
N |
N |
| 2018 |
1010085.0 |
110071159657 |
1519015 |
HAZARD |
KY |
41701 |
1845 S. KY HWY 15 |
PERRY COUNTY |
37.236617 |
-83.18126 |
212112 |
FF |
OTHER |
93918.75 |
NA |
93918.75 |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
93918.75 |
NA |
NA |
NA |
NA |
NA |
N |
N |
N |
| 2018 |
1001155.0 |
110070942964 |
1500 SOUTH TIBBS LLC DBA AURORIUM INDIANAPOLIS LLC |
INDIANAPOLIS |
IN |
46242 |
1500 SOUTH TIBBS AVENUE |
MARION COUNTY |
39.73 |
-86.26 |
325199 |
C |
CHEMICALS |
72163.662 |
72064.6 |
41.25 |
57.812 |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
4771.1 |
72163.662 |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
N |
N |
N |
| 2018 |
1000112.0 |
110071159492 |
23RD AND 3RD |
BROOKLYN |
NY |
11232 |
730 3RD AVENUE |
KINGS |
40.663 |
-74.0 |
221112 |
C,D |
POWER PLANTS |
70705.288 |
70633.5 |
32.75 |
39.038 |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
250.1 |
70455.188 |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
N |
N |
N |
| 2018 |
1003742.0 |
110038508335 |
31ST STREET LANDFILL |
WESTCHESTER |
IL |
60154 |
11700 W 31ST ST |
COOK COUNTY |
41.834962 |
-87.916392 |
562212 |
C,HH |
WASTE |
31220.15 |
168.4 |
31051.75 |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
168.4 |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
31051.75 |
NA |
NA |
NA |
N |
N |
N |
head(GHGP_combined_data$Suppliers) %>% gt() %>% gt::fmt_auto()
| year |
facility_id |
frs_id |
facility_name |
city |
state |
zip_code |
address |
county |
latitude |
longitude |
primary_naics_code |
industry_type_subparts |
ghg_quantity_associated_with_coal_based_liquid_fuel_production |
ghg_quantity_associated_with_petroleum_products_produced |
ghg_quantity_associated_with_petroleum_products_imported |
ghg_quantity_associated_with_petroleum_products_exported |
ghg_quantity_associated_with_natural_gas_supply |
ghg_quantity_associated_with_natural_gas_liquids_supply |
ghg_quantity_associated_with_co2_supply |
| 2016 |
1012147 |
NA |
17Z GAS PLANT CHEVRON USA INC |
MCKITTRICK |
CA |
93251 |
22845 HIGHWAY 33 |
NA |
35.318187100000003 |
-119.6338842 |
211112 |
C,NN-FRAC,W-PROC |
NA |
NA |
NA |
NA |
NA |
162409.2 |
NA |
| 2016 |
1002574 |
110070834967 |
3M COMPANY |
ST PAUL |
MN |
55144 |
3M CENTER |
RAMSEY COUNTY |
45.032510000000002 |
-93.094190999999995 |
339999 |
OO |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
| 2016 |
1004206 |
110000367567 |
3M COMPANY |
DECATUR |
AL |
35609 |
1400 STATE DOCKS RD. |
MORGAN COUNTY |
34.641666999999998 |
-87.038611000000003 |
325211 |
C,L,OO |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
| 2016 |
1006665 |
110013886875 |
3M CORDOVA |
CORDOVA |
IL |
61242 |
22614 ROUTE 84 NORTH |
ROCK ISLAND COUNTY |
41.755000000000003 |
-90.284166999999997 |
325998 |
C,L,OO |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
| 2016 |
1004836 |
110000423667 |
3M COTTAGE GROVE CENTER SITE |
COTTAGE GROVE |
MN |
55016 |
10746 INNOVATION RD |
WASHINGTON COUNTY |
44.789444000000003 |
-92.908332999999999 |
325998 |
C,L,N,OO |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
| 2016 |
1004761 |
110013812918 |
ACE ETHANOL LLC |
STANLEY |
WI |
54768 |
815 WEST MAPLE STREET |
CHIPPEWA COUNTY |
44.9589 |
-90.960800000000006 |
325193 |
C,PP |
NA |
NA |
NA |
NA |
NA |
NA |
CONFIDENTIAL |
#print(head(GHGP_combined_data$`Onshore Oil & Gas Prod.`))
#print(head(GHGP_combined_data$`Gathering & Boosting`))
#print(head(GHGP_combined_data$`Transmission Pipelines`))
#print(head(GHGP_combined_data$`LDC - Direct Emissions`))
#print(head(GHGP_combined_data$`SF6 from Elec. Equip.`))
NAICS Code Data
This section loads the NAICS Codes data, which provides industry
classification codes and titles. This information is essential for
contextualizing the sectors in which the facilities operate, supporting
further analysis. The NAICS codes file is obtained from the Census
website. This data will be merged with the EPA emissions data to assign
industry titles to each facility based on its NAICS code.
# Define local path using `here`
naics_local_path <- here("Data", "Raw", "NAICS_Codes", "6-digit_2022_Codes.xlsx")
# Read the file, skipping any third column (like notes or descriptions)
naics <- read_excel(
naics_local_path,
col_names = TRUE,
col_types = c('numeric', 'text', 'skip') # skip third column if present
)
# Select and clean only the relevant columns
naics <- naics %>%
select(`2022 NAICS Code`, `2022 NAICS Title`) %>%
slice(-1) # remove first row it's blank
# Create a manual patch table (for missing naics codes in the ghgp)
naics_patch <- tribble(
~`2022 NAICS Code`, ~`2022 NAICS Title`,
211112, "Natural Gas Liquid Extraction",
325188, "All Other Basic Inorganic Chemical Manufacturing",
325192, "Other Synthetic Organic Dye and Pigment Manufacturing",
333999, "All Other Miscellaneous General Purpose Machinery Manufacturing",
335222, "Household Refrigerator and Home Freezer Manufacturing",
336111, "Automobile Manufacturing",
441310, "Automotive Parts and Accessories Stores"
)
# Bind the patch table to the main NAICS data
naics <- bind_rows(naics, naics_patch) %>%
mutate(across(everything(), toupper)) %>%
mutate(`2022 NAICS Code` = as.numeric(`2022 NAICS Code`))
# Print
head(naics) %>% gt() %>% gt::fmt_auto()
| 2022 NAICS Code |
2022 NAICS Title |
| 111,110 |
SOYBEAN FARMING |
| 111,120 |
OILSEED (EXCEPT SOYBEAN) FARMING |
| 111,130 |
DRY PEA AND BEAN FARMING |
| 111,140 |
WHEAT FARMING |
| 111,150 |
CORN FARMING |
| 111,160 |
RICE FARMING |
GHGP Parent Company Data
This section loads the EPA GHGRP Parent Company Data file available
from the EPA.It contains information on the parent companies that own
(or partially own) facilities reporting GHG emissions, including the
percentage of ownership.Combining data across multiple years (2016-2023)
allows us to link facility-level emissions with ownership details,
enabling the attribution of emissions to corporate entities.
# Define path
parent_company_path <- here("Data", "Raw", "EPA_GHGRP_Data_Parent_Company", "ghgp_data_parent_company.xlsb")
# Confirmed sheet names (adjust if needed)
sheet_names <- c("2016", "2017", "2018", "2019", "2020", "2021", "2022", "2023")
# Initialize progress bar
pb <- progress_bar$new(total = length(sheet_names), format = "Reading sheet :sheet [:bar] :percent")
# Read each sheet with error handling and progress update
parent_company_data_combined <- map(sheet_names, function(sheet) {
pb$tick(tokens = list(sheet = sheet))
tryCatch(
read_xlsb(parent_company_path, sheet = sheet),
error = function(e) {
message("Failed to read sheet: ", sheet)
NULL
}
)
})
# Combine all sheets in the list into one dataframe
parent_company_data_full <- bind_rows(parent_company_data_combined, .id = "year_index")
# Convert year column to character for merge later
parent_company_data <- parent_company_data_full %>%
clean_names() %>%
mutate(across(everything(), toupper)) %>%
mutate(facility_name = str_remove(facility_name, "[-/(].*"),
facility_name = str_replace_all(facility_name, "[[:punct:]]", ""),
parent_company_name = str_replace_all(parent_company_name, "[[:punct:]]", "")) %>%
mutate(across(everything(), str_squish)) %>%
mutate(Facility = as.character(ghgrp_facility_id),
Year = as.numeric(reporting_year),
parent_co_percent_ownership = as.numeric(parent_co_percent_ownership),
.keep="unused")
# Preview the first few rows of the combined NAICS codes data.
#glimpse(parent_company_data)
Emissions Data Wrangling
The emissions data are presented in several different sections of
EPA’s spreadsheet, so here we compile them all together. Emissions are
measured in metric tons of CO2 equivalent, or tCO₂e (see: https://www.theguardian.com/environment/2011/apr/27/co2e-global-warming-potential).
Direct Emitter Facilities
This section processes the EPA GHGRP emissions data for direct
emitter facilities. The data is sourced from multiple spreadsheets
(e.g., “Direct Point Emitters”, “Onshore Oil & Gas Prod.”, etc.)
contained within the GHGRP summary files. Each sheet contains different
emission metrics measured in metric tons of CO2 equivalent (tCO₂e). The
code begins by defining a mapping of sheet names to their corresponding
column names for emissions data, ensuring that the correct fields are
extracted from each file. It then initializes an empty data frame to
compile data across all sheets. For each sheet, the code selects key
variables such as facility ID, facility name, NAICS code, and the
specified emissions metric, converting the emissions values to numeric
format. After compiling the data, it aggregates entries for duplicate
facility IDs by summing their emissions. Next, the code merges this
aggregated data with the NAICS codes data—sourced from the Census—to
assign industry titles to each facility based on its NAICS code.
Finally, the data is split by year, and for each year the top 10 direct
emitters are displayed using the gt package to generate formatted
tables. This structured approach not only cleans and aggregates the
emissions data but also contextualizes it by industry, facilitating a
clear, year-by-year comparison of facility emissions.
# Correct list of emission field names with the accurate column names from your data frames
emissions_field_names <- list(
"Direct Point Emitters" = "total_reported_direct_emissions",
"Onshore Oil & Gas Prod." = "total_reported_emissions_from_onshore_oil_gas_production", # Corrected name
"Gathering & Boosting" = "total_reported_emissions_from_gathering_boosting", # Corrected name
"Transmission Pipelines" = "total_reported_direct_emissions_from_transmission_pipelines",
"LDC - Direct Emissions" = "total_reported_direct_emissions_from_local_distribution_companies",
"SF6 from Elec. Equip." = "total_reported_direct_emissions_from_electrical_equipment_use"
)
# Create an empty data frame to store emissions data compiled from direct emitters
fac_directemitters_data <- data.frame(
Facility = character(),
Name = character(),
`2022 NAICS Code` = character(),
`Emissions (tCO₂e)` = numeric(),
Year = character(),
stringsAsFactors = FALSE
)
# Compile emissions data across direct emitters for CO2 and NOₓ
for (key in names(emissions_field_names)) {
dump <- GHGP_combined_data[[key]] %>%
dplyr::select(facility_id, facility_name, primary_naics_code, year, all_of(emissions_field_names[[key]])) %>%
rename(
Facility = facility_id,
Name = facility_name,
`2022 NAICS Code` = primary_naics_code,
Year = year,
`Emissions (tCO₂e)` = all_of(emissions_field_names[[key]])
)
dump$`Emissions (tCO₂e)` <- as.numeric(dump$`Emissions (tCO₂e)`)
fac_directemitters_data <- bind_rows(fac_directemitters_data, dump)
}
# Aggregate duplicate facility IDs
fac_directemitters_data <- fac_directemitters_data %>%
group_by(Year, Facility, Name, `2022 NAICS Code`) %>%
summarise(`Emissions (tCO₂e)` = sum(`Emissions (tCO₂e)`, na.rm = TRUE), .groups = "drop")
# Join with NAICS data
fac_directemitters_data <- fac_directemitters_data %>%
mutate(`2022 NAICS Code` = as.numeric(`2022 NAICS Code`)) %>%
left_join(naics, by = "2022 NAICS Code")
fac_directemitters_data_table <- fac_directemitters_data %>%
select(-`2022 NAICS Code`, -Facility) %>%
rename(`NAICS Title` = `2022 NAICS Title`) %>%
arrange(desc(`Emissions (tCO₂e)`))
# Split the full data frame by year
fac_directemitters_by_year <- split(fac_directemitters_data_table, fac_directemitters_data_table$Year)
# For each year, create and render a GT table showing the top 10 direct emitter parent companies
lapply(fac_directemitters_by_year, function(df_year) {
current_year <- unique(df_year$Year)
df_year %>%
arrange(desc(`Emissions (tCO₂e)`)) %>%
slice_head(n = 10) %>%
gt() %>%
tab_header(
title = paste("EPA GHGRP Direct Emitters –", current_year),
subtitle = "Top 10 Parent Companies by Emissions (tCO₂e)"
) %>%
fmt_number(
columns = vars(`Emissions (tCO₂e)`),
decimals = 0,
use_seps = TRUE
) %>%
cols_width(
Name ~ px(300),
`NAICS Title` ~ px(300),
`Emissions (tCO₂e)` ~ px(200)
) %>%
cols_align(
align = "center",
columns = everything()
)
})
$
2016
|
EPA GHGRP Direct Emitters – 2016
|
|
Top 10 Parent Companies by Emissions (tCO₂e)
|
|
Year
|
Name
|
Emissions (tCO₂e)
|
NAICS Title
|
|
2016
|
HILCORP NORTH SLOPE BASIN 890GB FACILITY
|
3,274,432
|
NA
|
|
2016
|
SAN JUAN BASIN 580
|
2,759,591
|
NA
|
|
2016
|
SAN JOAQUIN BASIN GATHERING BOOSTING CHEVRON USA INC
|
2,750,199
|
NA
|
|
2016
|
COG OPERATING LLC 430 PERMIAN BASIN
|
2,443,823
|
NA
|
|
2016
|
EP ENERGY EP 220 GULF COAST BASIN GB
|
2,034,697
|
NA
|
|
2016
|
DCP MIDSTREAM 430 PERMIAN BASIN
|
1,928,560
|
NA
|
|
2016
|
ARKOMA BASIN BG
|
1,817,086
|
NA
|
|
2016
|
WILLIAMS GB FACILITY APPALACHIAN BASIN EASTERN OVERTHRUST AREA 160A
|
1,757,937
|
SUPPORT ACTIVITIES FOR OIL AND GAS OPERATIONS
|
|
2016
|
DCP MIDSTREAM 360 ANADARKO BASIN
|
1,740,508
|
NA
|
|
2016
|
CRI 395 WILLISTON
|
1,674,608
|
NA
|
$
2017
|
EPA GHGRP Direct Emitters – 2017
|
|
Top 10 Parent Companies by Emissions (tCO₂e)
|
|
Year
|
Name
|
Emissions (tCO₂e)
|
NAICS Title
|
|
2017
|
HILCORP NORTH SLOPE BASIN 890GB FACILITY
|
3,726,705
|
NA
|
|
2017
|
SAN JOAQUIN BASIN GATHERING BOOSTING CHEVRON USA INC
|
2,486,522
|
CRUDE PETROLEUM EXTRACTION
|
|
2017
|
SAN JUAN BASIN 580
|
2,224,914
|
NA
|
|
2017
|
DGC ARKOMA BG
|
1,875,182
|
NA
|
|
2017
|
WILLIAMS GB FACILITY APPALACHIAN BASIN EASTERN OVERTHRUST AREA 160A
|
1,869,197
|
SUPPORT ACTIVITIES FOR OIL AND GAS OPERATIONS
|
|
2017
|
DCP MIDSTREAM 430 PERMIAN BASIN
|
1,841,403
|
NATURAL GAS EXTRACTION
|
|
2017
|
430 PERMIAN BASIN
|
1,640,321
|
NA
|
|
2017
|
CRI 395 WILLISTON
|
1,625,650
|
NA
|
|
2017
|
DCP MIDSTREAM 360 ANADARKO BASIN
|
1,606,662
|
NATURAL GAS EXTRACTION
|
|
2017
|
WHITING OIL AND GAS CORPORATION 395 WILLISTON BASIN
|
1,504,611
|
NA
|
$
2018
|
EPA GHGRP Direct Emitters – 2018
|
|
Top 10 Parent Companies by Emissions (tCO₂e)
|
|
Year
|
Name
|
Emissions (tCO₂e)
|
NAICS Title
|
|
2018
|
JAMES H MILLER JR
|
18,429,639
|
FOSSIL FUEL ELECTRIC POWER GENERATION
|
|
2018
|
SCHERER
|
16,695,176
|
FOSSIL FUEL ELECTRIC POWER GENERATION
|
|
2018
|
MONROE
|
16,400,875
|
FOSSIL FUEL ELECTRIC POWER GENERATION
|
|
2018
|
GIBSON
|
16,325,781
|
FOSSIL FUEL ELECTRIC POWER GENERATION
|
|
2018
|
MARTIN LAKE
|
14,870,599
|
FOSSIL FUEL ELECTRIC POWER GENERATION
|
|
2018
|
LABADIE
|
14,846,788
|
FOSSIL FUEL ELECTRIC POWER GENERATION
|
|
2018
|
W A PARISH
|
14,620,769
|
FOSSIL FUEL ELECTRIC POWER GENERATION
|
|
2018
|
GEN J M GAVIN
|
14,472,667
|
FOSSIL FUEL ELECTRIC POWER GENERATION
|
|
2018
|
NAVAJO GENERATING STATION
|
13,960,706
|
FOSSIL FUEL ELECTRIC POWER GENERATION
|
|
2018
|
BOWEN
|
13,437,167
|
FOSSIL FUEL ELECTRIC POWER GENERATION
|
$
2019
|
EPA GHGRP Direct Emitters – 2019
|
|
Top 10 Parent Companies by Emissions (tCO₂e)
|
|
Year
|
Name
|
Emissions (tCO₂e)
|
NAICS Title
|
|
2019
|
JAMES H MILLER JR
|
19,169,743
|
FOSSIL FUEL ELECTRIC POWER GENERATION
|
|
2019
|
MONROE
|
15,144,724
|
FOSSIL FUEL ELECTRIC POWER GENERATION
|
|
2019
|
COLSTRIP
|
14,277,559
|
FOSSIL FUEL ELECTRIC POWER GENERATION
|
|
2019
|
LABADIE
|
14,180,190
|
FOSSIL FUEL ELECTRIC POWER GENERATION
|
|
2019
|
SCHERER
|
13,764,419
|
FOSSIL FUEL ELECTRIC POWER GENERATION
|
|
2019
|
GEN J M GAVIN
|
13,044,750
|
FOSSIL FUEL ELECTRIC POWER GENERATION
|
|
2019
|
W A PARISH
|
12,862,472
|
FOSSIL FUEL ELECTRIC POWER GENERATION
|
|
2019
|
PRAIRIE STATE GENERATING STATION
|
12,737,155
|
FOSSIL FUEL ELECTRIC POWER GENERATION
|
|
2019
|
MARTIN LAKE
|
12,704,073
|
FOSSIL FUEL ELECTRIC POWER GENERATION
|
|
2019
|
OAK GROVE
|
12,336,777
|
FOSSIL FUEL ELECTRIC POWER GENERATION
|
$
2020
|
EPA GHGRP Direct Emitters – 2020
|
|
Top 10 Parent Companies by Emissions (tCO₂e)
|
|
Year
|
Name
|
Emissions (tCO₂e)
|
NAICS Title
|
|
2020
|
JAMES H MILLER JR
|
17,232,898
|
FOSSIL FUEL ELECTRIC POWER GENERATION
|
|
2020
|
LABADIE
|
15,710,653
|
FOSSIL FUEL ELECTRIC POWER GENERATION
|
|
2020
|
GEN J M GAVIN
|
13,788,223
|
FOSSIL FUEL ELECTRIC POWER GENERATION
|
|
2020
|
MARTIN LAKE
|
13,518,397
|
FOSSIL FUEL ELECTRIC POWER GENERATION
|
|
2020
|
OAK GROVE
|
13,103,472
|
FOSSIL FUEL ELECTRIC POWER GENERATION
|
|
2020
|
MONROE
|
13,059,155
|
FOSSIL FUEL ELECTRIC POWER GENERATION
|
|
2020
|
PRAIRIE STATE GENERATING STATION
|
11,950,052
|
FOSSIL FUEL ELECTRIC POWER GENERATION
|
|
2020
|
GIBSON
|
11,391,066
|
FOSSIL FUEL ELECTRIC POWER GENERATION
|
|
2020
|
EXXONMOBIL BT SITE
|
11,308,233
|
PETROLEUM REFINERIES
|
|
2020
|
JIM BRIDGER
|
11,225,008
|
FOSSIL FUEL ELECTRIC POWER GENERATION
|
$
2021
|
EPA GHGRP Direct Emitters – 2021
|
|
Top 10 Parent Companies by Emissions (tCO₂e)
|
|
Year
|
Name
|
Emissions (tCO₂e)
|
NAICS Title
|
|
2021
|
JAMES H MILLER JR
|
20,998,639
|
FOSSIL FUEL ELECTRIC POWER GENERATION
|
|
2021
|
LABADIE
|
15,760,177
|
FOSSIL FUEL ELECTRIC POWER GENERATION
|
|
2021
|
MONROE
|
14,379,178
|
FOSSIL FUEL ELECTRIC POWER GENERATION
|
|
2021
|
W A PARISH
|
13,911,354
|
FOSSIL FUEL ELECTRIC POWER GENERATION
|
|
2021
|
MARTIN LAKE
|
13,515,092
|
FOSSIL FUEL ELECTRIC POWER GENERATION
|
|
2021
|
GEN J M GAVIN
|
13,478,316
|
FOSSIL FUEL ELECTRIC POWER GENERATION
|
|
2021
|
OAK GROVE
|
12,617,336
|
FOSSIL FUEL ELECTRIC POWER GENERATION
|
|
2021
|
PRAIRIE STATE GENERATING STATION
|
12,496,789
|
FOSSIL FUEL ELECTRIC POWER GENERATION
|
|
2021
|
EXXONMOBIL BT SITE
|
11,811,121
|
PETROLEUM REFINERIES
|
|
2021
|
JOHN E AMOS
|
11,528,677
|
FOSSIL FUEL ELECTRIC POWER GENERATION
|
$
2022
|
EPA GHGRP Direct Emitters – 2022
|
|
Top 10 Parent Companies by Emissions (tCO₂e)
|
|
Year
|
Name
|
Emissions (tCO₂e)
|
NAICS Title
|
|
2022
|
JAMES H MILLER JR
|
21,775,440
|
FOSSIL FUEL ELECTRIC POWER GENERATION
|
|
2022
|
LABADIE
|
15,860,759
|
FOSSIL FUEL ELECTRIC POWER GENERATION
|
|
2022
|
MONROE
|
14,908,126
|
FOSSIL FUEL ELECTRIC POWER GENERATION
|
|
2022
|
MARTIN LAKE
|
13,330,423
|
FOSSIL FUEL ELECTRIC POWER GENERATION
|
|
2022
|
OAK GROVE
|
12,697,798
|
FOSSIL FUEL ELECTRIC POWER GENERATION
|
|
2022
|
EXXONMOBIL BT SITE
|
12,611,929
|
PETROLEUM REFINERIES
|
|
2022
|
W A PARISH
|
12,436,232
|
FOSSIL FUEL ELECTRIC POWER GENERATION
|
|
2022
|
HARRISON POWER STATION
|
11,694,164
|
FOSSIL FUEL ELECTRIC POWER GENERATION
|
|
2022
|
LARAMIE RIVER
|
11,524,663
|
FOSSIL FUEL ELECTRIC POWER GENERATION
|
|
2022
|
GEN J M GAVIN
|
11,298,607
|
FOSSIL FUEL ELECTRIC POWER GENERATION
|
$
2023
|
EPA GHGRP Direct Emitters – 2023
|
|
Top 10 Parent Companies by Emissions (tCO₂e)
|
|
Year
|
Name
|
Emissions (tCO₂e)
|
NAICS Title
|
|
2023
|
JAMES H MILLER JR
|
16,558,381
|
FOSSIL FUEL ELECTRIC POWER GENERATION
|
|
2023
|
LABADIE
|
15,388,715
|
FOSSIL FUEL ELECTRIC POWER GENERATION
|
|
2023
|
GEN J M GAVIN
|
13,451,174
|
FOSSIL FUEL ELECTRIC POWER GENERATION
|
|
2023
|
MARTIN LAKE
|
12,787,949
|
FOSSIL FUEL ELECTRIC POWER GENERATION
|
|
2023
|
EXXONMOBIL BT SITE
|
12,693,547
|
PETROLEUM REFINERIES
|
|
2023
|
OAK GROVE
|
12,278,594
|
FOSSIL FUEL ELECTRIC POWER GENERATION
|
|
2023
|
PRAIRIE STATE GENERATING STATION
|
11,456,295
|
FOSSIL FUEL ELECTRIC POWER GENERATION
|
|
2023
|
MONROE
|
11,413,347
|
FOSSIL FUEL ELECTRIC POWER GENERATION
|
|
2023
|
HARRISON POWER STATION
|
11,179,958
|
FOSSIL FUEL ELECTRIC POWER GENERATION
|
|
2023
|
LARAMIE RIVER
|
10,979,532
|
FOSSIL FUEL ELECTRIC POWER GENERATION
|
Supplier Facilities
This section processes the supplier facilities data from the EPA
GHGRP. It begins by converting various text-based emission fields—such
as those for coal-based liquid fuel production and petroleum
products—into numeric values, while treating any missing or confidential
values as zeros. The code then renames and selects the key columns
(facility ID, facility name, NAICS code, emissions, and year) to ensure
consistency with the final output format. Next, the supplier data is
merged with NAICS Codes to attach industry classification titles to each
facility. Finally, the data is organized by year, and for each year the
top 10 supplier facilities (sorted by emissions) are displayed using the
gt package to create interactive, well-formatted tables.
# Convert character columns to numeric for the necessary emission fields,
# treating NAs and confidential values as zeros.
Suppliers <- GHGP_combined_data$Suppliers %>%
mutate(
`Emissions (tCO₂e)` = coalesce(
as.numeric(gsub("[^0-9.]", "", ghg_quantity_associated_with_coal_based_liquid_fuel_production)), 0) +
coalesce(as.numeric(gsub("[^0-9.]", "", ghg_quantity_associated_with_petroleum_products_produced)), 0) +
coalesce(as.numeric(gsub("[^0-9.]", "", ghg_quantity_associated_with_petroleum_products_imported)), 0) +
coalesce(as.numeric(gsub("[^0-9.]", "", ghg_quantity_associated_with_petroleum_products_exported)), 0) +
coalesce(as.numeric(gsub("[^0-9.]", "", ghg_quantity_associated_with_natural_gas_supply)), 0) +
coalesce(as.numeric(gsub("[^0-9.]", "", ghg_quantity_associated_with_natural_gas_liquids_supply)), 0) +
coalesce(as.numeric(gsub("[^0-9.]", "", ghg_quantity_associated_with_co2_supply)), 0)
)
# Rename columns to align with the final output format for suppliers
fac_suppliers_data <- Suppliers %>%
mutate(
primary_naics_code = as.numeric(primary_naics_code)
) %>%
rename(
Facility = facility_id,
Name = facility_name,
`2022 NAICS Code` = primary_naics_code,
Year = year
) %>%
select(Facility, Name, `2022 NAICS Code`, `Emissions (tCO₂e)`, Year)
# Merge supplier data with NAICS codes to include industry titles
fac_suppliers_data <- fac_suppliers_data %>%
left_join(naics, by = "2022 NAICS Code")
# Prepare a presentation table by removing redundant columns and renaming for clarity
fac_suppliers_data_table <- fac_suppliers_data %>%
select(-`2022 NAICS Code`, -Facility) %>%
rename(`NAICS Title` = `2022 NAICS Title`) %>%
arrange(desc(`Emissions (tCO₂e)`))
# Split the data by year for per-year reporting
fac_suppliers_by_year <- split(fac_suppliers_data_table, fac_suppliers_data_table$Year)
# For each year, create and render a GT table showing the top 10 supplier parent companies
lapply(fac_suppliers_by_year, function(df_year) {
current_year <- unique(df_year$Year)
df_year %>%
arrange(desc(`Emissions (tCO₂e)`)) %>%
slice_head(n = 10) %>%
gt() %>%
tab_header(
title = paste("EPA GHGP Suppliers –", current_year),
subtitle = "Top 10 Parent Companies by Emissions (tCO₂e)"
) %>%
fmt_number(
columns = vars(`Emissions (tCO₂e)`),
decimals = 0,
use_seps = TRUE
) %>%
cols_width(
Name ~ px(300),
`NAICS Title` ~ px(300),
`Emissions (tCO₂e)` ~ px(200)
) %>%
cols_align(
align = "center",
columns = everything()
)
})
$
2016
|
EPA GHGP Suppliers – 2016
|
|
Top 10 Parent Companies by Emissions (tCO₂e)
|
|
Name
|
Emissions (tCO₂e)
|
Year
|
NAICS Title
|
|
MOTIVA ENTERPRISES LLC
|
85,411,041
|
2016
|
PETROLEUM REFINERIES
|
|
GARYVILLE REFINERY
|
83,342,023
|
2016
|
PETROLEUM REFINERIES
|
|
EXXONMOBIL BATON ROUGE REFINERY AND CHEMICAL PLANT
|
77,118,287
|
2016
|
PETROLEUM REFINERIES
|
|
EXXONMOBIL BT SITE
|
73,764,091
|
2016
|
PETROLEUM REFINERIES
|
|
VALERO MARKETING AND SUPPLY COMPANY
|
68,788,211
|
2016
|
PETROLEUM AND PETROLEUM PRODUCTS MERCHANT WHOLESALERS (EXCEPT BULK
STATIONS AND TERMINALS)
|
|
GALVESTON BAY REFINERY
|
63,711,477
|
2016
|
PETROLEUM REFINERIES
|
|
CITGO PETROLEUM CORP LAKE CHARLES MANUFACTURING COMPLEX
|
57,816,931
|
2016
|
PETROLEUM REFINERIES
|
|
CHEVRON MM PASCAGOULA
|
53,184,465
|
2016
|
PETROLEUM REFINERIES
|
|
TESORO REFINING MARKETING CARSON REFINERY
|
52,600,641
|
2016
|
PETROLEUM REFINERIES
|
|
BP WHITING BUSINESS UNIT
|
48,341,410
|
2016
|
PETROLEUM REFINERIES
|
$
2017
|
EPA GHGP Suppliers – 2017
|
|
Top 10 Parent Companies by Emissions (tCO₂e)
|
|
Name
|
Emissions (tCO₂e)
|
Year
|
NAICS Title
|
|
MOTIVA ENTERPRISES LLC
|
89,561,871
|
2017
|
PETROLEUM REFINERIES
|
|
GARYVILLE REFINERY
|
85,165,684
|
2017
|
PETROLEUM REFINERIES
|
|
EXXONMOBIL BATON ROUGE REFINERY AND CHEMICAL PLANT
|
74,986,475
|
2017
|
PETROLEUM REFINERIES
|
|
EXXONMOBIL BT SITE
|
70,977,274
|
2017
|
PETROLEUM REFINERIES
|
|
GALVESTON BAY REFINERY
|
65,855,431
|
2017
|
PETROLEUM REFINERIES
|
|
VALERO MARKETING AND SUPPLY COMPANY
|
59,095,885
|
2017
|
PETROLEUM AND PETROLEUM PRODUCTS MERCHANT WHOLESALERS (EXCEPT BULK
STATIONS AND TERMINALS)
|
|
BP WHITING BUSINESS UNIT
|
55,259,607
|
2017
|
PETROLEUM REFINERIES
|
|
CITGO PETROLEUM CORP LAKE CHARLES MANUFACTURING COMPLEX
|
54,640,332
|
2017
|
PETROLEUM REFINERIES
|
|
CHEVRON MM PASCAGOULA
|
54,539,051
|
2017
|
PETROLEUM REFINERIES
|
|
TESORO REFINING MARKETING CARSON REFINERY
|
51,979,688
|
2017
|
PETROLEUM REFINERIES
|
$
2018
|
EPA GHGP Suppliers – 2018
|
|
Top 10 Parent Companies by Emissions (tCO₂e)
|
|
Name
|
Emissions (tCO₂e)
|
Year
|
NAICS Title
|
|
MOTIVA ENTERPRISES LLC
|
90,855,626
|
2018
|
PETROLEUM REFINERIES
|
|
GARYVILLE REFINERY
|
87,937,165
|
2018
|
PETROLEUM REFINERIES
|
|
VALERO MARKETING AND SUPPLY COMPANY
|
83,328,500
|
2018
|
PETROLEUM AND PETROLEUM PRODUCTS MERCHANT WHOLESALERS (EXCEPT BULK
STATIONS AND TERMINALS)
|
|
GALVESTON BAY REFINERY
|
75,976,955
|
2018
|
PETROLEUM REFINERIES
|
|
EXXONMOBIL BT SITE
|
75,791,749
|
2018
|
PETROLEUM REFINERIES
|
|
EXXONMOBIL BATON ROUGE REFINERY AND CHEMICAL PLANT
|
74,443,926
|
2018
|
PETROLEUM REFINERIES
|
|
CITGO PETROLEUM CORP LAKE CHARLES MANUFACTURING COMPLEX
|
60,912,652
|
2018
|
PETROLEUM REFINERIES
|
|
BP WHITING BUSINESS UNIT
|
55,106,714
|
2018
|
PETROLEUM REFINERIES
|
|
PREMCOR REFINING GROUP INCORPORATED PORTARTHUR REFINERY
|
52,562,347
|
2018
|
PETROLEUM REFINERIES
|
|
CHEVRON MM PASCAGOULA
|
51,000,986
|
2018
|
PETROLEUM REFINERIES
|
$
2019
|
EPA GHGP Suppliers – 2019
|
|
Top 10 Parent Companies by Emissions (tCO₂e)
|
|
Name
|
Emissions (tCO₂e)
|
Year
|
NAICS Title
|
|
MOTIVA ENTERPRISES LLC
|
88,062,057
|
2019
|
PETROLEUM REFINERIES
|
|
GARYVILLE REFINERY
|
82,986,331
|
2019
|
PETROLEUM REFINERIES
|
|
EXXONMOBIL BATON ROUGE REFINERY AND CHEMICAL PLANT
|
77,569,433
|
2019
|
PETROLEUM REFINERIES
|
|
GALVESTON BAY REFINERY
|
77,378,042
|
2019
|
PETROLEUM REFINERIES
|
|
VALERO MARKETING AND SUPPLY COMPANY
|
72,076,497
|
2019
|
PETROLEUM AND PETROLEUM PRODUCTS MERCHANT WHOLESALERS (EXCEPT BULK
STATIONS AND TERMINALS)
|
|
EXXONMOBIL BT SITE
|
68,729,170
|
2019
|
PETROLEUM REFINERIES
|
|
BP WHITING BUSINESS UNIT
|
58,054,768
|
2019
|
PETROLEUM REFINERIES
|
|
CITGO PETROLEUM CORP LAKE CHARLES MANUFACTURING COMPLEX
|
56,010,912
|
2019
|
PETROLEUM REFINERIES
|
|
CHEVRON MM PASCAGOULA
|
52,178,591
|
2019
|
PETROLEUM REFINERIES
|
|
TESORO REFINING MARKETING CARSON REFINERY
|
50,628,067
|
2019
|
PETROLEUM REFINERIES
|
$
2020
|
EPA GHGP Suppliers – 2020
|
|
Top 10 Parent Companies by Emissions (tCO₂e)
|
|
Name
|
Emissions (tCO₂e)
|
Year
|
NAICS Title
|
|
MOTIVA ENTERPRISES LLC
|
84,684,302
|
2020
|
PETROLEUM REFINERIES
|
|
GARYVILLE REFINERY
|
84,389,430
|
2020
|
PETROLEUM REFINERIES
|
|
EXXONMOBIL BT SITE
|
76,148,788
|
2020
|
PETROLEUM REFINERIES
|
|
EXXONMOBIL BATON ROUGE REFINERY AND CHEMICAL PLANT
|
69,760,075
|
2020
|
PETROLEUM REFINERIES
|
|
ENTERPRISE PRODUCTS OPERATING LLC
|
66,679,801
|
2020
|
MARINE CARGO HANDLING
|
|
GALVESTON BAY REFINERY
|
66,235,711
|
2020
|
PETROLEUM REFINERIES
|
|
VALERO MARKETING AND SUPPLY COMPANY
|
61,214,779
|
2020
|
PETROLEUM AND PETROLEUM PRODUCTS MERCHANT WHOLESALERS (EXCEPT BULK
STATIONS AND TERMINALS)
|
|
BP WHITING BUSINESS UNIT
|
57,236,188
|
2020
|
PETROLEUM REFINERIES
|
|
MONT BELVIEU FRACTIONATOR
|
48,876,668
|
2020
|
NATURAL GAS EXTRACTION
|
|
TRAFIGURA TRADING LLC
|
48,701,114
|
2020
|
WHOLESALE TRADE AGENTS AND BROKERS
|
$
2021
|
EPA GHGP Suppliers – 2021
|
|
Top 10 Parent Companies by Emissions (tCO₂e)
|
|
Name
|
Emissions (tCO₂e)
|
Year
|
NAICS Title
|
|
VALERO MARKETING AND SUPPLY COMPANY
|
96,018,305
|
2021
|
PETROLEUM AND PETROLEUM PRODUCTS MERCHANT WHOLESALERS (EXCEPT BULK
STATIONS AND TERMINALS)
|
|
GARYVILLE REFINERY
|
89,301,972
|
2021
|
PETROLEUM REFINERIES
|
|
MOTIVA ENTERPRISES LLC
|
88,602,296
|
2021
|
PETROLEUM REFINERIES
|
|
EXXONMOBIL BATON ROUGE REFINERY AND CHEMICAL PLANT
|
78,710,667
|
2021
|
PETROLEUM REFINERIES
|
|
EXXONMOBIL BT SITE
|
72,620,394
|
2021
|
PETROLEUM REFINERIES
|
|
GALVESTON BAY REFINERY
|
67,327,296
|
2021
|
PETROLEUM REFINERIES
|
|
ENTERPRISE PRODUCTS OPERATING LLC
|
60,076,255
|
2021
|
MARINE CARGO HANDLING
|
|
BP WHITING BUSINESS UNIT
|
58,664,235
|
2021
|
PETROLEUM REFINERIES
|
|
CITGO PETROLEUM CORP LAKE CHARLES MANUFACTURING COMPLEX
|
52,352,915
|
2021
|
PETROLEUM REFINERIES
|
|
EXXONMOBIL BEAUMONT REFINERY
|
52,351,018
|
2021
|
PETROLEUM REFINERIES
|
$
2022
|
EPA GHGP Suppliers – 2022
|
|
Top 10 Parent Companies by Emissions (tCO₂e)
|
|
Name
|
Emissions (tCO₂e)
|
Year
|
NAICS Title
|
|
VALERO MARKETING AND SUPPLY COMPANY
|
169,436,028
|
2022
|
PETROLEUM AND PETROLEUM PRODUCTS MERCHANT WHOLESALERS (EXCEPT BULK
STATIONS AND TERMINALS)
|
|
MOTIVA ENTERPRISES LLC
|
92,888,241
|
2022
|
PETROLEUM REFINERIES
|
|
GARYVILLE REFINERY
|
90,625,212
|
2022
|
PETROLEUM REFINERIES
|
|
EXXONMOBIL BATON ROUGE REFINERY AND CHEMICAL PLANT
|
86,689,540
|
2022
|
PETROLEUM REFINERIES
|
|
GALVESTON BAY REFINERY
|
79,286,393
|
2022
|
PETROLEUM REFINERIES
|
|
EXXONMOBIL BT SITE
|
79,154,149
|
2022
|
PETROLEUM REFINERIES
|
|
ENTERPRISE PRODUCTS OPERATING LLC
|
64,534,515
|
2022
|
MARINE CARGO HANDLING
|
|
CITGO PETROLEUM CORP LAKE CHARLES MANUFACTURING COMPLEX
|
63,421,358
|
2022
|
PETROLEUM REFINERIES
|
|
BP WHITING BUSINESS UNIT
|
62,426,213
|
2022
|
PETROLEUM REFINERIES
|
|
TRAFIGURA TRADING LLC
|
61,825,661
|
2022
|
WHOLESALE TRADE AGENTS AND BROKERS
|
$
2023
|
EPA GHGP Suppliers – 2023
|
|
Top 10 Parent Companies by Emissions (tCO₂e)
|
|
Name
|
Emissions (tCO₂e)
|
Year
|
NAICS Title
|
|
VALERO MARKETING AND SUPPLY COMPANY
|
238,380,354
|
2023
|
PETROLEUM AND PETROLEUM PRODUCTS MERCHANT WHOLESALERS (EXCEPT BULK
STATIONS AND TERMINALS)
|
|
GARYVILLE REFINERY
|
93,705,370
|
2023
|
PETROLEUM REFINERIES
|
|
MOTIVA ENTERPRISES LLC
|
91,983,868
|
2023
|
PETROLEUM REFINERIES
|
|
EXXONMOBIL BEAUMONT REFINERY
|
83,078,905
|
2023
|
PETROLEUM REFINERIES
|
|
EXXONMOBIL BT SITE
|
82,201,241
|
2023
|
PETROLEUM REFINERIES
|
|
EXXONMOBIL BATON ROUGE REFINERY AND CHEMICAL PLANT
|
80,977,604
|
2023
|
PETROLEUM REFINERIES
|
|
ENTERPRISE PRODUCTS OPERATING LLC
|
72,902,210
|
2023
|
MARINE CARGO HANDLING
|
|
GALVESTON BAY REFINERY
|
71,172,513
|
2023
|
PETROLEUM REFINERIES
|
|
BP WHITING BUSINESS UNIT
|
65,956,897
|
2023
|
PETROLEUM REFINERIES
|
|
CITGO PETROLEUM CORP LAKE CHARLES MANUFACTURING COMPLEX
|
64,197,374
|
2023
|
PETROLEUM REFINERIES
|
Calculating Corporate Emissions
In this section, we merge the emissions data from direct emitters and
suppliers with the parent company information. This allows us to
attribute each facility’s reported GHG emissions—measured in metric tons
of CO2 equivalent (tCO₂e)—to its owning corporate entities based on
their percentage ownership. The following code performs the necessary
data type conversions, left joins, and filtering to prepare the data for
further analysis.
# Convert relevant columns in parent_company_data to the appropriate types
parent_company_data <- parent_company_data %>%
mutate(
Facility = as.character(as.numeric(Facility)),
Year = as.numeric(Year)
)
# Merge direct emitter data with parent company information
parent_emitters <- fac_directemitters_data %>%
mutate(
Facility = as.character(as.numeric(Facility)),
Year = as.numeric(Year)
) %>%
left_join(parent_company_data, by = c("Facility", "Year")) %>%
filter(`Emissions (tCO₂e)` != 0)
# Merge supplier data with parent company information using a similar approach.
parent_suppliers <- fac_suppliers_data %>%
mutate(
Facility = as.character(as.numeric(Facility)),
Year = as.numeric(Year)
) %>%
left_join(parent_company_data, by = c("Facility", "Year")) %>%
filter(`Emissions (tCO₂e)` != 0)
# Display a sample of the merged data for verification.
#parent_emitters %>% glimpse()
#parent_suppliers%>% glimpse()
Next, we calculate each parent company’s responsibility for emissions
based on their percentage ownership. For each facility, the reported
emissions are multiplied by the company’s ownership percentage (divided
by 100) to determine the share of emissions attributed to that
company.
# For direct emitters:
epa_parent_company_emitters <- parent_emitters %>%
group_by(Year) %>%
mutate(`Parent Company Emissions (tCO₂e)` = as.numeric(`Emissions (tCO₂e)`) *
as.numeric(parent_co_percent_ownership) / 100) %>%
rename(`Parent Company` = parent_company_name)
# For supplier facilities:
epa_parent_company_suppliers <- parent_suppliers %>%
group_by(Year) %>%
mutate(`Parent Company Emissions (tCO₂e)` = as.numeric(`Emissions (tCO₂e)`) *
as.numeric(parent_co_percent_ownership) / 100) %>%
rename(`Parent Company` = parent_company_name)
write_csv(epa_parent_company_suppliers, here("Data", "Processed", "EPA_GHGP_Parent_Company_Suppliers.csv"))
write_csv(epa_parent_company_emitters, here("Data", "Processed", "EPA_GHGP_Parent_Company_Emitters.csv"))
Finally, we summarize the emissions data by parent company and year.
The summarized data includes the total emissions attributed to each
company and the number of facilities contributing to those emissions.
Formatted tables are then generated for each year using the gt package,
which creates interactive and publication-ready tables.
# Summarize emissions for direct emitters by parent company and year.
summarized_emitters <- epa_parent_company_emitters %>%
group_by(`Parent Company`, Year) %>%
summarise(
`Parent Company Emissions (tCO₂e)` = sum(as.numeric(`Parent Company Emissions (tCO₂e)`), na.rm = TRUE),
`# of Facilities` = n(),
.groups = "drop"
) %>%
arrange(desc(`Parent Company Emissions (tCO₂e)`)) %>%
ungroup()
# Summarize emissions for supplier facilities by parent company and year.
suppliers_summarized <- epa_parent_company_suppliers %>%
group_by(`Parent Company`, Year) %>%
summarise(
`Parent Company Emissions (tCO₂e)` = sum(as.numeric(`Parent Company Emissions (tCO₂e)`), na.rm = TRUE),
`# of Facilities` = n(),
.groups = "drop"
) %>%
arrange(desc(`Parent Company Emissions (tCO₂e)`)) %>%
ungroup()
# Define a function that creates a GT table for the top 10 companies per year.
# The function returns a list of GT tables, one per year.
print_parent_company_tables <- function(summary_df, title_prefix = "Emitters") {
# Extract unique years from the dataset
years <- sort(unique(summary_df$Year))
# Initialize an empty list to store GT tables
gt_tables <- list()
# Loop over each year
for (yr in years) {
# Filter data for the current year and select top 10 companies by emissions
gt_table <- summary_df %>%
filter(Year == yr) %>%
select(-Year) %>% # Remove Year column for display
slice_head(n = 10) %>% # Select top 10 companies
gt() %>%
fmt_number(
columns = vars(`Parent Company Emissions (tCO₂e)`),
decimals = 0,
use_seps = TRUE
) %>%
cols_label(
`Parent Company Emissions (tCO₂e)` = "Emissions (tCO₂e)",
`# of Facilities` = "# Facilities"
) %>%
cols_width(
`Parent Company` ~ px(300),
`Parent Company Emissions (tCO₂e)` ~ px(200),
`# of Facilities` ~ px(150)
) %>%
tab_header(
title = paste(title_prefix, "–", yr)
)
# Add the table to the list using the year as the name
gt_tables[[as.character(yr)]] <- gt_table
}
# Return the list of GT tables
return(gt_tables)
}
# Generate formatted GT tables for direct emitters and supplier emissions.
direct_emitter_tables <- print_parent_company_tables(summarized_emitters, title_prefix = "EPA GHGP Corporate Direct Emitters")
supplier_tables <- print_parent_company_tables(suppliers_summarized, title_prefix = "EPA GHGP Corporate Supplier Emissions")
# In an R Markdown document, simply returning the list will render the tables.
direct_emitter_tables
$
2016
|
EPA GHGP Corporate Direct Emitters – 2016
|
|
Parent Company
|
Emissions (tCO₂e)
|
# Facilities
|
|
EXXONMOBIL CORP
|
8,063,542
|
37
|
|
BP AMERICA INC
|
7,027,144
|
12
|
|
CONOCOPHILLIPS
|
6,876,850
|
15
|
|
ANADARKO PETROLEUM CORP
|
5,366,493
|
16
|
|
WILLIAMS PARTNERS LP
|
5,361,810
|
13
|
|
ENERGY TRANSFER PARTNERS LP
|
4,925,638
|
13
|
|
CHESAPEAKE ENERGY CORP
|
3,972,768
|
13
|
|
CHEVRON CORP
|
3,815,182
|
15
|
|
EOG RESOURCES INC
|
3,557,717
|
19
|
|
SOUTHWESTERN ENERGY CO
|
3,014,378
|
4
|
$
2017
|
EPA GHGP Corporate Direct Emitters – 2017
|
|
Parent Company
|
Emissions (tCO₂e)
|
# Facilities
|
|
EXXONMOBIL CORP
|
7,007,264
|
33
|
|
BP AMERICA INC
|
6,458,902
|
12
|
|
WILLIAMS PARTNERS LP
|
5,480,890
|
13
|
|
HILCORP ENERGY CO
|
4,998,568
|
13
|
|
ENERGY TRANSFER PARTNERS LP
|
4,935,847
|
13
|
|
DCP MIDSTREAM LP
|
4,842,979
|
9
|
|
CONOCOPHILLIPS
|
4,087,293
|
13
|
|
EOG RESOURCES INC
|
3,853,076
|
18
|
|
ANADARKO PETROLEUM CORP
|
3,781,616
|
11
|
|
CHEVRON CORP
|
3,444,193
|
11
|
$
2018
|
EPA GHGP Corporate Direct Emitters – 2018
|
|
Parent Company
|
Emissions (tCO₂e)
|
# Facilities
|
|
VISTRA ENERGY CORP
|
114,272,492
|
51
|
|
SOUTHERN CO
|
99,309,274
|
36
|
|
DUKE ENERGY CORP
|
97,383,677
|
47
|
|
AMERICAN ELECTRIC POWER
|
68,513,351
|
28
|
|
BERKSHIRE HATHAWAY INC
|
53,971,373
|
49
|
|
XCEL ENERGY
|
46,923,088
|
36
|
|
NRG ENERGY INC
|
39,287,627
|
31
|
|
CALPINE CORP
|
39,269,420
|
47
|
|
EXXONMOBIL CORP
|
39,268,814
|
88
|
|
ENTERGY CORP
|
37,726,617
|
26
|
$
2019
|
EPA GHGP Corporate Direct Emitters – 2019
|
|
Parent Company
|
Emissions (tCO₂e)
|
# Facilities
|
|
VISTRA ENERGY CORP
|
102,761,542
|
49
|
|
THE SOUTHERN CO
|
87,487,053
|
63
|
|
DUKE ENERGY CORP
|
85,958,619
|
47
|
|
AMERICAN ELECTRIC POWER CO INC
|
70,020,857
|
31
|
|
BERKSHIRE HATHAWAY INC
|
64,518,982
|
96
|
|
NEXTERA ENERGY INC
|
44,607,595
|
30
|
|
XCEL ENERGY
|
43,084,156
|
36
|
|
EXXON MOBIL CORP
|
42,750,562
|
91
|
|
VOLT PARENT LP
|
40,168,527
|
44
|
|
ENTERGY CORP
|
36,294,210
|
27
|
$
2020
|
EPA GHGP Corporate Direct Emitters – 2020
|
|
Parent Company
|
Emissions (tCO₂e)
|
# Facilities
|
|
VISTRA CORP
|
91,782,979
|
40
|
|
THE SOUTHERN CO
|
75,724,039
|
58
|
|
DUKE ENERGY CORP
|
75,471,450
|
47
|
|
BERKSHIRE HATHAWAY INC
|
57,581,146
|
129
|
|
AMERICAN ELECTRIC POWER CO INC
|
54,179,231
|
30
|
|
VOLT PARENT LP
|
44,574,110
|
43
|
|
NEXTERA ENERGY INC
|
42,552,839
|
31
|
|
EXXON MOBIL CORP
|
41,492,248
|
89
|
|
DOMINION ENERGY INC
|
36,006,719
|
47
|
|
XCEL ENERGY INC
|
35,750,399
|
35
|
$
2021
|
EPA GHGP Corporate Direct Emitters – 2021
|
|
Parent Company
|
Emissions (tCO₂e)
|
# Facilities
|
|
VISTRA CORP
|
95,510,483
|
44
|
|
THE SOUTHERN CO
|
82,154,166
|
58
|
|
DUKE ENERGY CORP
|
78,135,150
|
48
|
|
BERKSHIRE HATHAWAY INC
|
63,872,118
|
129
|
|
AMERICAN ELECTRIC POWER CO INC
|
62,351,437
|
28
|
|
EXXON MOBIL CORP
|
43,484,952
|
91
|
|
VOLT PARENT LP
|
42,828,661
|
44
|
|
NEXTERA ENERGY INC
|
41,395,422
|
30
|
|
XCEL ENERGY INC
|
38,629,183
|
35
|
|
ENTERGY CORP
|
37,166,686
|
32
|
$
2022
|
EPA GHGP Corporate Direct Emitters – 2022
|
|
Parent Company
|
Emissions (tCO₂e)
|
# Facilities
|
|
VISTRA CORP
|
92,454,741
|
43
|
|
THE SOUTHERN CO
|
83,987,686
|
56
|
|
DUKE ENERGY CORP
|
78,479,645
|
49
|
|
AMERICAN ELECTRIC POWER CO INC
|
58,686,889
|
27
|
|
BERKSHIRE HATHAWAY INC
|
58,241,154
|
129
|
|
EXXON MOBIL CORP
|
42,244,322
|
86
|
|
NEXTERA ENERGY INC
|
41,639,555
|
29
|
|
ENTERGY CORP
|
40,219,776
|
31
|
|
XCEL ENERGY INC
|
37,521,758
|
36
|
|
CPN MANAGEMENT LP
|
33,550,720
|
28
|
$
2023
|
EPA GHGP Corporate Direct Emitters – 2023
|
|
Parent Company
|
Emissions (tCO₂e)
|
# Facilities
|
|
VISTRA CORP
|
86,320,993
|
40
|
|
THE SOUTHERN CO
|
76,718,005
|
54
|
|
DUKE ENERGY CORP
|
72,776,123
|
49
|
|
BERKSHIRE HATHAWAY INC
|
53,055,963
|
128
|
|
AMERICAN ELECTRIC POWER CO INC
|
48,971,843
|
26
|
|
CPN MANAGEMENT LP
|
47,703,517
|
44
|
|
NEXTERA ENERGY INC
|
42,867,570
|
27
|
|
ENTERGY CORP
|
38,142,109
|
30
|
|
EXXON MOBIL CORP
|
37,882,422
|
59
|
|
XCEL ENERGY INC
|
36,005,568
|
36
|
supplier_tables
$
2016
|
EPA GHGP Corporate Supplier Emissions – 2016
|
|
Parent Company
|
Emissions (tCO₂e)
|
# Facilities
|
|
PHILLIPS 66
|
336,693,357
|
19
|
|
VALERO ENERGY CORP
|
320,675,752
|
15
|
|
MARATHON PETROLEUM CO LP
|
274,434,705
|
8
|
|
EXXONMOBIL CORP
|
264,232,481
|
14
|
|
SHELL OIL CO
|
179,811,952
|
11
|
|
CHEVRON CORP
|
161,409,117
|
7
|
|
PDV HOLDING INC
|
129,223,074
|
5
|
|
TESORO CORP
|
118,882,739
|
7
|
|
ENTERPRISE PRODUCTS PARTNERS LP
|
113,600,350
|
15
|
|
BP AMERICA INC
|
112,266,235
|
8
|
$
2017
|
EPA GHGP Corporate Supplier Emissions – 2017
|
|
Parent Company
|
Emissions (tCO₂e)
|
# Facilities
|
|
PHILLIPS 66
|
351,616,213
|
17
|
|
VALERO ENERGY CORP
|
331,428,491
|
15
|
|
MARATHON PETROLEUM CO LP
|
278,381,915
|
8
|
|
EXXONMOBIL CORP
|
266,631,258
|
14
|
|
SHELL OIL CO
|
215,892,381
|
11
|
|
CHEVRON CORP
|
175,521,491
|
7
|
|
BP AMERICA INC
|
141,130,289
|
8
|
|
TESORO CORP
|
128,749,113
|
8
|
|
PDV HOLDING INC
|
121,841,178
|
5
|
|
ENTERPRISE PRODUCTS PARTNERS LP
|
121,388,975
|
15
|
$
2018
|
EPA GHGP Corporate Supplier Emissions – 2018
|
|
Parent Company
|
Emissions (tCO₂e)
|
# Facilities
|
|
PHILLIPS 66
|
374,397,807
|
17
|
|
VALERO ENERGY CORP
|
367,804,281
|
15
|
|
MARATHON PETROLEUM CO LP
|
314,928,553
|
9
|
|
EXXONMOBIL CORP
|
287,284,557
|
13
|
|
SHELL OIL CO
|
172,409,795
|
10
|
|
CHEVRON CORP
|
169,072,637
|
6
|
|
PDV HOLDING INC
|
131,545,877
|
5
|
|
BP AMERICA INC
|
126,697,011
|
6
|
|
ENTERPRISE PRODUCTS PARTNERS LP
|
124,207,040
|
16
|
|
TESORO CORP
|
110,382,394
|
6
|
$
2019
|
EPA GHGP Corporate Supplier Emissions – 2019
|
|
Parent Company
|
Emissions (tCO₂e)
|
# Facilities
|
|
MARATHON PETROLEUM CORP
|
462,298,283
|
20
|
|
PHILLIPS 66
|
375,433,574
|
17
|
|
VALERO ENERGY CORP
|
341,421,586
|
15
|
|
EXXON MOBIL CORP
|
283,974,088
|
13
|
|
SHELL PETROLEUM INC
|
182,902,796
|
9
|
|
CHEVRON CORP
|
173,927,432
|
5
|
|
BP AMERICA INC
|
145,701,026
|
6
|
|
ENTERPRISE PRODUCTS PARTNERS LP
|
135,147,774
|
16
|
|
PDV AMERICA INC
|
117,506,952
|
5
|
|
KOCH INDUSTRIES INC
|
99,170,496
|
4
|
$
2020
|
EPA GHGP Corporate Supplier Emissions – 2020
|
|
Parent Company
|
Emissions (tCO₂e)
|
# Facilities
|
|
MARATHON PETROLEUM CORP
|
401,574,105
|
21
|
|
PHILLIPS 66
|
324,103,187
|
17
|
|
VALERO ENERGY CORP
|
298,505,939
|
15
|
|
EXXON MOBIL CORP
|
282,255,946
|
13
|
|
ENTERPRISE PRODUCTS PARTNERS LP
|
168,952,523
|
18
|
|
SHELL PETROLEUM INC
|
149,092,739
|
9
|
|
CHEVRON CORP
|
135,388,031
|
5
|
|
BP AMERICA INC
|
133,602,244
|
5
|
|
ARAMCO SERVICES CO
|
102,916,368
|
2
|
|
PDV AMERICA INC
|
94,924,513
|
5
|
$
2021
|
EPA GHGP Corporate Supplier Emissions – 2021
|
|
Parent Company
|
Emissions (tCO₂e)
|
# Facilities
|
|
MARATHON PETROLEUM CORP
|
422,682,631
|
18
|
|
VALERO ENERGY CORP
|
356,721,973
|
15
|
|
PHILLIPS 66
|
339,053,077
|
16
|
|
EXXON MOBIL CORP
|
298,748,974
|
12
|
|
ENTERPRISE PRODUCTS PARTNERS LP
|
167,386,818
|
16
|
|
CHEVRON CORP
|
158,393,471
|
5
|
|
ARAMCO SERVICES CO
|
108,739,236
|
2
|
|
BP AMERICA INC
|
107,686,993
|
4
|
|
PDV AMERICA INC
|
106,156,281
|
5
|
|
KOCH INDUSTRIES INC
|
103,765,062
|
4
|
$
2022
|
EPA GHGP Corporate Supplier Emissions – 2022
|
|
Parent Company
|
Emissions (tCO₂e)
|
# Facilities
|
|
MARATHON PETROLEUM CORP
|
442,905,790
|
17
|
|
VALERO ENERGY CORP
|
436,980,733
|
15
|
|
EXXON MOBIL CORP
|
327,048,178
|
13
|
|
PHILLIPS 66
|
318,555,900
|
16
|
|
ENTERPRISE PRODUCTS PARTNERS LP
|
178,531,580
|
16
|
|
CHEVRON CORP
|
154,340,013
|
6
|
|
PDV AMERICA INC
|
124,001,333
|
5
|
|
PBF ENERGY INC
|
120,232,669
|
7
|
|
BP AMERICA INC
|
107,083,164
|
5
|
|
KOCH INDUSTRIES INC
|
107,012,557
|
4
|
$
2023
|
EPA GHGP Corporate Supplier Emissions – 2023
|
|
Parent Company
|
Emissions (tCO₂e)
|
# Facilities
|
|
VALERO ENERGY CORP
|
511,680,742
|
15
|
|
MARATHON PETROLEUM CORP
|
437,065,133
|
17
|
|
PHILLIPS 66
|
346,616,539
|
19
|
|
EXXON MOBIL CORP
|
341,759,108
|
10
|
|
ENTERPRISE PRODUCTS PARTNERS LP
|
196,807,286
|
17
|
|
CHEVRON CORP
|
164,470,509
|
5
|
|
BP AMERICA INC
|
142,190,500
|
5
|
|
PDV AMERICA INC
|
122,786,461
|
5
|
|
PBF ENERGY INC
|
107,787,144
|
7
|
|
KOCH INDUSTRIES INC
|
104,440,178
|
4
|
Visualizing Top Direct Emitters and Suppliers
This section defines a function that, for each year, filters the
summarized emissions data, selects the top 10 parent companies based on
their attributed emissions, and generates a horizontal bar chart using
ggplot2. The resulting plots offer a clear visual comparison of
corporate GHG emissions across different years for both direct emitters
and suppliers.
plot_top_emitters <- function(data, title_prefix = "Emitters") {
# Load the stringr package for text wrapping functionality
library(stringr)
# Define a custom palette of 10 distinct, saturated colors using RColorBrewer's "Paired" palette.
custom_colors <- RColorBrewer::brewer.pal(10, "Paired")
# Extract and sort the unique years present in the dataset
years <- sort(unique(data$Year))
# Loop through each year in the dataset
for (yr in years) {
# Filter data for the current year, sort by emissions in descending order,
# select the top 10 companies, and reorder the parent company factor by emissions.
df <- data %>%
filter(Year == yr) %>%
arrange(desc(`Parent Company Emissions (tCO₂e)`)) %>%
slice_head(n = 10) %>%
mutate(`Parent Company` = fct_reorder(`Parent Company`, `Parent Company Emissions (tCO₂e)`))
# Generate a horizontal bar chart using ggplot2:
# - geom_col() creates the bars with a fixed width.
# - coord_flip() rotates the chart for better readability.
# - scale_fill_manual() applies the custom color palette.
# - scale_x_discrete() wraps long labels for clarity.
# - scale_y_continuous() formats y-axis labels with commas.
# - labs() sets the title, subtitle, and axis labels.
# - theme_minimal() with additional theme adjustments ensures a clean presentation.
p <- ggplot(df, aes(
x = `Parent Company`,
y = `Parent Company Emissions (tCO₂e)`,
fill = `Parent Company`
)) +
geom_col(width = 0.6, show.legend = FALSE) +
coord_flip() +
scale_fill_manual(values = custom_colors) +
scale_x_discrete(labels = function(x) str_wrap(x, width = 20)) +
scale_y_continuous(labels = scales::comma) +
labs(
title = paste(title_prefix, "-", yr),
subtitle = "Top 10 Parent Companies by Emissions",
x = "Parent Company",
y = "Emissions (tCO₂e)"
) +
theme_minimal(base_size = 11) +
theme(
plot.title = element_text(face = "bold", size = 14, hjust = 0.5),
plot.subtitle = element_text(size = 11, hjust = 0.5),
axis.title.x = element_text(face = "bold", size = 11),
axis.title.y = element_text(face = "bold", size = 11),
axis.text.y = element_text(size = 10),
panel.grid.major.y = element_blank(),
panel.grid.minor = element_blank(),
panel.grid.major.x = element_line(color = "grey80"),
plot.background = element_rect(fill = "white", color = NA),
panel.background = element_rect(fill = "white", color = NA)
)
# Print the plot for the current year
print(p)
}
}
# Example usage: Generate plots for both direct emitters and supplier emissions.
plot_top_emitters(summarized_emitters, title_prefix = "Direct Emitters")








plot_top_emitters(suppliers_summarized, title_prefix = "Supplier Emissions")








This function aggregates emission data by parent company across the
available time period (2016-2013), selects the top 10 companies based on
their total emissions, and generates a horizontal bar chart. The chart
visually displays the total emissions attributed to each parent company,
facilitating an academic-level comparison across the years.
# This function aggregates emissions by parent company across all years (2016–2023)
# for improved spacing between axis titles and the axis text.
plot_top_10_parent_companies <- function(data, title = "Top 10 Parent Companies (2016–2023)") {
# Aggregate data: group by Parent Company and sum their emissions
top10 <- data %>%
group_by(`Parent Company`) %>%
summarise(`Total Emissions (tCO₂e)` = sum(`Parent Company Emissions (tCO₂e)`, na.rm = TRUE)) %>%
arrange(desc(`Total Emissions (tCO₂e)`)) %>%
slice_head(n = 10) %>%
mutate(`Parent Company` = fct_reorder(`Parent Company`, `Total Emissions (tCO₂e)`))
# Define a custom palette of 10 distinct, saturated colors using RColorBrewer's "Paired" palette.
custom_colors <- RColorBrewer::brewer.pal(10, "Paired")
# Create a horizontal bar chart using ggplot2 with refined aesthetics.
p <- ggplot(top10, aes(x = `Parent Company`, y = `Total Emissions (tCO₂e)`, fill = `Parent Company`)) +
geom_col(width = 0.6, show.legend = FALSE) + # Fixed bar width, no legend.
coord_flip() + # Horizontal bars.
scale_fill_manual(values = custom_colors) + # Apply the custom color palette.
scale_x_discrete(labels = function(x) stringr::str_wrap(x, width = 25)) + # Wrap long labels.
scale_y_continuous(labels = scales::comma) + # Format y-axis labels with commas.
labs(
title = title,
x = "Parent Company",
y = "Total Emissions (tCO₂e)"
) +
theme_minimal(base_size = 12) +
theme(
plot.title = element_text(face = "bold", size = 16, hjust = 0.5),
axis.title.x = element_text(face = "bold", size = 12, margin = margin(t = 10)),
axis.title.y = element_text(face = "bold", size = 12, margin = margin(r = 10)),
axis.text.y = element_text(size = 10),
axis.text.x = element_text(size = 8),
panel.grid.major.y = element_blank(), # Remove horizontal grid lines.
panel.grid.minor = element_blank(),
panel.grid.major.x = element_line(color = "grey80"),
plot.background = element_rect(fill = "white", color = NA),
panel.background = element_rect(fill = "white", color = NA),
plot.margin = margin(t = 30, r = 30, b = 30, l = 30)
)
print(p)
}
# Generate publication-quality plots using the aggregated data across all years.
plot_top_10_parent_companies(summarized_emitters, title = "Top 10 Direct Emitters (2016–2023)")

plot_top_10_parent_companies(suppliers_summarized, title = "Top 10 Supplier Emissions (2016–2023)")

Share of Emissions by Corporations
This section calculates the percentage share of total emissions (in
tCO₂e) accounted for by the top 50 parent companies for each year. The
analysis is performed separately for direct emitters and suppliers,
providing insights into the quantities of emissions among the largest
corporate entities. The code filters the summarized data for each year,
calculates the total emissions, selects the top 50 companies based on
emissions, and computes the percentage share of emissions attributed to
these companies. The results are displayed for each year, highlighting
the contribution of the top 50 companies to the total emissions in the
dataset.
# This code calculates the percentage share of total emissions (tCO₂e) accounted for by the top 50 parent companies for each year, separately for direct emitters and suppliers.
# Calculate shares for Direct Emitters
direct_shares <- summarized_emitters %>%
group_by(Year) %>%
nest() %>%
mutate(
Total_Emissions = map_dbl(data, ~ sum(.x$`Parent Company Emissions (tCO₂e)`, na.rm = TRUE)),
Top10_Emissions = map_dbl(data, ~ sum(slice_max(.x, order_by = `Parent Company Emissions (tCO₂e)`, n = 10)$`Parent Company Emissions (tCO₂e)`, na.rm = TRUE)),
Top50_Emissions = map_dbl(data, ~ sum(slice_max(.x, order_by = `Parent Company Emissions (tCO₂e)`, n = 50)$`Parent Company Emissions (tCO₂e)`, na.rm = TRUE))
) %>%
mutate(
`Top 10 Share (%)` = round(Top10_Emissions / Total_Emissions * 100, 1),
`Top 50 Share (%)` = round(Top50_Emissions / Total_Emissions * 100, 1)
) %>%
select(Year, `Top 10 Share (%)`, `Top 50 Share (%)`) %>%
ungroup() %>%
arrange(desc(Year))
# Display a GT table for Direct Emitters
direct_shares %>%
gt() %>%
tab_header(
title = "Direct Emitters: Percentage Share by Top Companies",
subtitle = "Percentage share of total emissions accounted for by the top 10 and top 50 companies"
) %>%
fmt_percent(
columns = c(`Top 10 Share (%)`, `Top 50 Share (%)`),
decimals = 1,
scale_values = FALSE
)
| Direct Emitters: Percentage Share by Top Companies |
| Percentage share of total emissions accounted for by the top 10 and top 50 companies |
| Year |
Top 10 Share (%) |
Top 50 Share (%) |
| 2023 |
21.3% |
49.1% |
| 2022 |
21.3% |
49.2% |
| 2021 |
21.9% |
50.1% |
| 2020 |
21.6% |
49.3% |
| 2019 |
22.0% |
49.7% |
| 2018 |
21.7% |
48.8% |
| 2017 |
25.9% |
63.3% |
| 2016 |
28.4% |
66.4% |
# Calculate shares for Suppliers
suppliers_shares <- suppliers_summarized %>%
group_by(Year) %>%
nest() %>%
mutate(
Total_Emissions = map_dbl(data, ~ sum(.x$`Parent Company Emissions (tCO₂e)`, na.rm = TRUE)),
Top10_Emissions = map_dbl(data, ~ sum(slice_max(.x, order_by = `Parent Company Emissions (tCO₂e)`, n = 10)$`Parent Company Emissions (tCO₂e)`, na.rm = TRUE)),
Top50_Emissions = map_dbl(data, ~ sum(slice_max(.x, order_by = `Parent Company Emissions (tCO₂e)`, n = 50)$`Parent Company Emissions (tCO₂e)`, na.rm = TRUE))
) %>%
mutate(
`Top 10 Share (%)` = round(Top10_Emissions / Total_Emissions * 100, 1),
`Top 50 Share (%)` = round(Top50_Emissions / Total_Emissions * 100, 1)
) %>%
select(Year, `Top 10 Share (%)`, `Top 50 Share (%)`) %>%
ungroup() %>%
arrange(desc(Year))
# Display a GT table for Suppliers
suppliers_shares %>%
gt() %>%
tab_header(
title = "Suppliers: Percentage Share by Top Companies",
subtitle = "Percentage share of total emissions accounted for by the top 10 and top 50 companies"
) %>%
fmt_percent(
columns = c(`Top 10 Share (%)`, `Top 50 Share (%)`),
decimals = 1,
scale_values = FALSE
)
| Suppliers: Percentage Share by Top Companies |
| Percentage share of total emissions accounted for by the top 10 and top 50 companies |
| Year |
Top 10 Share (%) |
Top 50 Share (%) |
| 2023 |
53.2% |
87.0% |
| 2022 |
51.3% |
86.5% |
| 2021 |
52.0% |
86.8% |
| 2020 |
51.6% |
86.7% |
| 2019 |
52.5% |
86.4% |
| 2018 |
50.0% |
85.1% |
| 2017 |
51.0% |
85.3% |
| 2016 |
49.3% |
83.7% |
The top 50 direct emitters are responsible for ~50% share of total
emissions. The top 10 suppliers account for ~85% of total emissions.
This analysis provides insights about the relative contributions of the
largest corporate entities to overall emissions.
Social Cost of Corporate Emissions
This section quantifies the Social Cost of Carbon (SCC) for each
corporate parent by applying a predetermined SCC value (in USD per
metric ton of CO2 equivalent) to the reported emissions. The SCC
represents the present value of the future damages caused by one
additional ton of CO2, encompassing impacts such as reduced agricultural
productivity, adverse health outcomes, and increased infrastructure
damage from extreme weather events. Seminal works like “The ‘Social Cost
of Carbon’ Made Simple” (Newbold et al., 2010) and subsequent studies
illustrate that the SCC captures the comprehensive economic cost of
climate change. In this analysis, each corporate parent’s emissions (in
tCO₂e) are multiplied by an EPA estimated SCC value—allowing for
adjustments based on evolving scientific and economic estimates.
Direct Emitter Facilities
# Aggregate direct emitter data across all years by summing emissions per parent company.
direct_total <- summarized_emitters %>%
group_by(`Parent Company`) %>%
summarise(Total_Emissions = sum(`Parent Company Emissions (tCO₂e)`, na.rm = TRUE)) %>%
ungroup()
# Select the top 50 companies by total emissions.
top_direct <- direct_total %>%
arrange(desc(Total_Emissions)) %>%
slice_head(n = 50)
# Calculate SCC values for direct emitters for three scenarios without formatting.
direct_total_scc <- top_direct %>%
mutate(
SCC_51 = Total_Emissions * 51,
SCC_100 = Total_Emissions * 100,
SCC_150 = Total_Emissions * 150
)
# Compute a summary (total) row that aggregates all the values for the top 50 companies.
direct_total_summary <- direct_total_scc %>%
summarise(
`Parent Company` = "TOTAL",
Total_Emissions = sum(Total_Emissions, na.rm = TRUE),
SCC_51 = sum(SCC_51, na.rm = TRUE),
SCC_100 = sum(SCC_100, na.rm = TRUE),
SCC_150 = sum(SCC_150, na.rm = TRUE)
)
# Append the summary row to the dataset.
direct_total_scc_combined <- bind_rows(direct_total_scc, direct_total_summary)
# Format the SCC columns as dollar amounts and Total Emissions with commas.
direct_total_scc_combined %>%
mutate(
Total_Emissions = scales::comma(Total_Emissions, accuracy = 1),
SCC_51 = scales::dollar(SCC_51),
SCC_100 = scales::dollar(SCC_100),
SCC_150 = scales::dollar(SCC_150)
) %>%
gt() %>%
tab_header(
title = "Social Cost of Carbon: Top 50 Direct Emitters (2016–2023)",
subtitle = "Estimated Present Value of Future Damages"
) %>%
cols_label(
`Parent Company` = "Parent Company",
Total_Emissions = "Total Emissions (tCO₂e)",
SCC_51 = "SCC @ $51/tCO2",
SCC_100 = "SCC @ $100/tCO2",
SCC_150 = "SCC @ $150/tCO2"
) %>%
cols_align(
align = "center",
columns = c(Total_Emissions, SCC_51, SCC_100, SCC_150)
)
| Social Cost of Carbon: Top 50 Direct Emitters (2016–2023) |
| Estimated Present Value of Future Damages |
| Parent Company |
Total Emissions (tCO₂e) |
SCC @ $51/tCO2 |
SCC @ $100/tCO2 |
SCC @ $150/tCO2 |
| DUKE ENERGY CORP |
489,677,218 |
$24,973,538,123 |
$48,967,721,809 |
$73,451,582,713 |
| THE SOUTHERN CO |
406,070,950 |
$20,709,618,429 |
$40,607,094,958 |
$60,910,642,438 |
| VISTRA CORP |
366,069,196 |
$18,669,529,015 |
$36,606,919,637 |
$54,910,379,455 |
| BERKSHIRE HATHAWAY INC |
351,670,571 |
$17,935,199,126 |
$35,167,057,109 |
$52,750,585,663 |
| AMERICAN ELECTRIC POWER CO INC |
294,210,257 |
$15,004,723,084 |
$29,421,025,655 |
$44,131,538,483 |
| ENTERGY CORP |
224,667,438 |
$11,458,039,348 |
$22,466,743,820 |
$33,700,115,730 |
| VISTRA ENERGY CORP |
217,034,035 |
$11,068,735,761 |
$21,703,403,453 |
$32,555,105,180 |
| NEXTERA ENERGY INC |
213,062,981 |
$10,866,212,049 |
$21,306,298,136 |
$31,959,447,204 |
| EXXON MOBIL CORP |
210,389,296 |
$10,729,854,093 |
$21,038,929,595 |
$31,558,394,392 |
| DOMINION ENERGY INC |
198,480,431 |
$10,122,501,956 |
$19,848,043,051 |
$29,772,064,576 |
| NRG ENERGY INC |
184,117,703 |
$9,390,002,851 |
$18,411,770,296 |
$27,617,655,444 |
| DTE ENERGY CO |
173,657,152 |
$8,856,514,755 |
$17,365,715,206 |
$26,048,572,810 |
| PHILLIPS 66 |
167,659,786 |
$8,550,649,064 |
$16,765,978,557 |
$25,148,967,836 |
| MARATHON PETROLEUM CORP |
156,815,795 |
$7,997,605,530 |
$15,681,579,471 |
$23,522,369,206 |
| AMEREN CORP |
152,657,107 |
$7,785,512,456 |
$15,265,710,698 |
$22,898,566,047 |
| XCEL ENERGY INC |
147,906,907 |
$7,543,252,282 |
$14,790,690,750 |
$22,186,036,124 |
| CHEVRON CORP |
147,504,780 |
$7,522,743,776 |
$14,750,477,992 |
$22,125,716,988 |
| KOCH INDUSTRIES INC |
143,059,985 |
$7,296,059,216 |
$14,305,998,463 |
$21,458,997,695 |
| PPL CORP |
139,653,181 |
$7,122,312,244 |
$13,965,318,126 |
$20,947,977,189 |
| VALERO ENERGY CORP |
138,663,843 |
$7,071,855,980 |
$13,866,384,274 |
$20,799,576,411 |
| VOLT PARENT LP |
137,743,550 |
$7,024,921,073 |
$13,774,355,046 |
$20,661,532,569 |
| BASIN ELECTRIC POWER COOPERATIVE |
132,839,011 |
$6,774,789,551 |
$13,283,901,080 |
$19,925,851,620 |
| EVERGY INC |
127,274,294 |
$6,490,988,993 |
$12,727,429,398 |
$19,091,144,097 |
| US STEEL CORP |
119,877,189 |
$6,113,736,655 |
$11,987,718,931 |
$17,981,578,397 |
| CF INDUSTRIES HOLDINGS INC |
117,036,515 |
$5,968,862,241 |
$11,703,651,453 |
$17,555,477,179 |
| LIGHTSTONE GENERATION LLC |
114,336,069 |
$5,831,139,526 |
$11,433,606,914 |
$17,150,410,372 |
| CLEVELANDCLIFFS INC |
112,569,602 |
$5,741,049,678 |
$11,256,960,153 |
$16,885,440,229 |
| AES CORP |
112,007,335 |
$5,712,374,093 |
$11,200,733,516 |
$16,801,100,274 |
| ASSOCIATED ELECTRIC COOPERATIVE INC |
103,317,410 |
$5,269,187,908 |
$10,331,740,996 |
$15,497,611,494 |
| BP AMERICA INC |
102,704,583 |
$5,237,933,741 |
$10,270,458,315 |
$15,405,687,473 |
| CMS ENERGY CORP |
100,416,520 |
$5,121,242,499 |
$10,041,651,958 |
$15,062,477,937 |
| SOUTHERN CO |
99,501,018 |
$5,074,551,935 |
$9,950,101,832 |
$14,925,152,749 |
| XCEL ENERGY |
90,634,791 |
$4,622,374,363 |
$9,063,479,142 |
$13,595,218,713 |
| FIRSTENERGY CORP |
90,330,574 |
$4,606,859,264 |
$9,033,057,381 |
$13,549,586,072 |
| TALEN ENERGY CORP |
86,629,258 |
$4,418,092,155 |
$8,662,925,794 |
$12,994,388,691 |
| ALLIANT ENERGY CORP |
83,811,099 |
$4,274,366,045 |
$8,381,109,891 |
$12,571,664,837 |
| DOW INC |
83,362,296 |
$4,251,477,088 |
$8,336,229,584 |
$12,504,344,376 |
| CPN MANAGEMENT LP |
81,254,237 |
$4,143,966,108 |
$8,125,423,741 |
$12,188,135,611 |
| AIR PRODUCTS CHEMICALS INC |
76,684,794 |
$3,910,924,512 |
$7,668,479,434 |
$11,502,719,152 |
| WASTE MANAGEMENT INC |
76,249,543 |
$3,888,726,692 |
$7,624,954,298 |
$11,437,431,447 |
| ENERGY TRANSFER LP |
76,106,994 |
$3,881,456,693 |
$7,610,699,398 |
$11,416,049,098 |
| WEC ENERGY GROUP INC |
74,005,379 |
$3,774,274,326 |
$7,400,537,894 |
$11,100,806,841 |
| KINDER MORGAN INC |
73,224,500 |
$3,734,449,509 |
$7,322,450,017 |
$10,983,675,026 |
| PRAIRIE STATE ENERGY CAMPUS MANAGEMENT CO |
71,846,286 |
$3,664,160,570 |
$7,184,628,570 |
$10,776,942,854 |
| CPS ENERGY |
71,468,514 |
$3,644,894,222 |
$7,146,851,416 |
$10,720,277,123 |
| ARCHER DANIELS MIDLAND CO |
69,194,597 |
$3,528,924,423 |
$6,919,459,654 |
$10,379,189,481 |
| AMERICAN ELECTRIC POWER |
69,131,362 |
$3,525,699,466 |
$6,913,136,207 |
$10,369,704,311 |
| OCCIDENTAL PETROLEUM CORP |
67,146,363 |
$3,424,464,488 |
$6,714,636,251 |
$10,071,954,376 |
| ENTERPRISE PRODUCTS PARTNERS LP |
66,168,480 |
$3,374,592,486 |
$6,616,848,012 |
$9,925,272,018 |
| CONOCOPHILLIPS |
64,537,931 |
$3,291,434,465 |
$6,453,793,069 |
$9,680,689,604 |
| TOTAL |
7,274,438,704 |
$370,996,373,906 |
$727,443,870,403 |
$1,091,165,805,605 |
Supplier Facilities
# Aggregate supplier data across all years by summing emissions per parent company.
suppliers_total <- suppliers_summarized %>%
group_by(`Parent Company`) %>%
summarise(Total_Emissions = sum(`Parent Company Emissions (tCO₂e)`, na.rm = TRUE)) %>%
ungroup()
# Select the top 10 companies by total emissions.
top_suppliers <- suppliers_total %>%
arrange(desc(Total_Emissions)) %>%
slice_head(n = 10)
# Calculate SCC values for suppliers for the three scenarios.
suppliers_total_scc <- top_suppliers %>%
mutate(
SCC_51 = Total_Emissions * 51,
SCC_100 = Total_Emissions * 100,
SCC_150 = Total_Emissions * 150
)
# Compute a summary (total) row for the supplier data.
suppliers_total_summary <-suppliers_total_scc %>%
summarise(
`Parent Company` = "TOTAL",
Total_Emissions = sum(Total_Emissions, na.rm = TRUE),
SCC_51 = sum(SCC_51, na.rm = TRUE),
SCC_100 = sum(SCC_100, na.rm = TRUE),
SCC_150 = sum(SCC_150, na.rm = TRUE)
)
# Append the summary row to the supplier data.
suppliers_total_scc_combined <- bind_rows(suppliers_total_scc, suppliers_total_summary)
# Format the data and create a GT table for suppliers.
suppliers_total_scc_combined %>%
mutate(
Total_Emissions = scales::comma(Total_Emissions, accuracy = 1),
SCC_51 = scales::dollar(SCC_51),
SCC_100 = scales::dollar(SCC_100),
SCC_150 = scales::dollar(SCC_150)
) %>%
gt() %>%
tab_header(
title = "Social Cost of Carbon: Top 10 Suppliers (2016–2023)",
subtitle = "Estimated Present Value of Future Damages"
) %>%
cols_label(
`Parent Company` = "Parent Company",
Total_Emissions = "Total Emissions (tCO₂e)",
SCC_51 = "SCC @ $51/tCO2",
SCC_100 = "SCC @ $100/tCO2",
SCC_150 = "SCC @ $150/tCO2"
) %>%
cols_align(
align = "center",
columns = c(Total_Emissions, SCC_51, SCC_100, SCC_150)
)
| Social Cost of Carbon: Top 10 Suppliers (2016–2023) |
| Estimated Present Value of Future Damages |
| Parent Company |
Total Emissions (tCO₂e) |
SCC @ $51/tCO2 |
SCC @ $100/tCO2 |
SCC @ $150/tCO2 |
| VALERO ENERGY CORP |
2,965,219,498 |
$151,226,194,407 |
$296,521,949,818 |
$444,782,924,727 |
| PHILLIPS 66 |
2,766,469,653 |
$141,089,952,294 |
$276,646,965,281 |
$414,970,447,922 |
| MARATHON PETROLEUM CORP |
2,185,421,825 |
$111,456,513,064 |
$218,542,182,478 |
$327,813,273,718 |
| EXXON MOBIL CORP |
1,533,786,295 |
$78,223,101,020 |
$153,378,629,450 |
$230,067,944,175 |
| CHEVRON CORP |
1,292,522,699 |
$65,918,657,637 |
$129,252,269,877 |
$193,878,404,816 |
| ENTERPRISE PRODUCTS PARTNERS LP |
1,206,022,346 |
$61,507,139,669 |
$120,602,234,645 |
$180,903,351,968 |
| BP AMERICA INC |
1,016,357,463 |
$51,834,230,600 |
$101,635,746,274 |
$152,453,619,411 |
| MARATHON PETROLEUM CO LP |
867,745,173 |
$44,255,003,822 |
$86,774,517,298 |
$130,161,775,947 |
| EXXONMOBIL CORP |
818,148,296 |
$41,725,563,076 |
$81,814,829,561 |
$122,722,244,341 |
| KOCH INDUSTRIES INC |
784,259,265 |
$39,997,222,533 |
$78,425,926,536 |
$117,638,889,804 |
| TOTAL |
15,435,952,512 |
$787,233,578,122 |
$1,543,595,251,219 |
$2,315,392,876,829 |
National Trends in Corporate Emissions
US Government Emissions
This code filters the parent‐company emissions data for rows where
the parent company is “US GOVERNMENT”. It then groups the data by year,
calculates the total emissions and counts the number of facilities.
# Filter for US Government facilities, group by Year, and calculate totals.
government_corporate_emissions <- epa_parent_company_emitters %>%
filter(`Parent Company` == "US GOVERNMENT") %>%
group_by(Year) %>%
summarise(
`Total Emissions (tCO₂e)` = sum(`Emissions (tCO₂e)`, na.rm = TRUE),
`# of Facilities` = n(),
.groups = "drop"
) %>%
arrange(desc(Year))
# Create and print a GT table for US Government corporate emissions.
government_corporate_emissions %>%
gt() %>%
tab_header(
title = "US Government Corporate Emissions",
subtitle = "Total Emissions and Facility Count by Year (2016–2023)"
) %>%
fmt_number(
columns = c(`Total Emissions (tCO₂e)`),
decimals = 0,
use_seps = TRUE
) %>%
cols_label(
Year = "Year",
`Total Emissions (tCO₂e)` = "Total Emissions (tCO₂e)",
`# of Facilities` = "# of Facilities"
) %>%
cols_align(
align = "center",
columns = everything()
)
| US Government Corporate Emissions |
| Total Emissions and Facility Count by Year (2016–2023) |
| Year |
Total Emissions (tCO₂e) |
# of Facilities |
| 2023 |
41,576,701 |
85 |
| 2022 |
44,610,528 |
85 |
| 2021 |
44,074,471 |
84 |
| 2020 |
38,000,839 |
82 |
| 2019 |
58,397,351 |
86 |
| 2018 |
65,595,526 |
86 |
| 2017 |
221,780 |
2 |
| 2016 |
152,336 |
2 |
National Security Emissions
This snippet selects facilities whose “2022 NAICS Title” is “National
Security”. It then groups and summarizes the data by year and creates a
table showing the total emissions and facility counts.
# Filter for National Security facilities, group by Year, and calculate totals.
national_security_emissions <- epa_parent_company_emitters %>%
filter(`2022 NAICS Title` == "NATIONAL SECURITY") %>%
group_by(Year) %>%
summarise(`Total Emissions (tCO₂e)` = sum(`Emissions (tCO₂e)`, na.rm = TRUE),
`# of Facilities` = n(),
.groups = "drop"
) %>%
arrange(desc(Year))
# Create and print a GT table for National Security emissions.
national_security_emissions %>%
gt() %>%
tab_header(
title = "National Security Emissions",
subtitle = "Total Emissions and Facility Count by Year (2016–2023)"
) %>%
fmt_number(
columns = c(`Total Emissions (tCO₂e)`),
decimals = 0,
use_seps = TRUE
) %>%
cols_label(
Year = "Year",
`Total Emissions (tCO₂e)` = "Total Emissions (tCO₂e)",
`# of Facilities` = "# of Facilities"
) %>%
cols_align(
align = "center",
columns = everything()
)
| National Security Emissions |
| Total Emissions and Facility Count by Year (2016–2023) |
| Year |
Total Emissions (tCO₂e) |
# of Facilities |
| 2023 |
2,290,414 |
45 |
| 2022 |
2,407,864 |
45 |
| 2021 |
2,341,308 |
43 |
| 2020 |
2,308,764 |
43 |
| 2019 |
2,418,534 |
45 |
| 2018 |
2,369,812 |
44 |
University Emissions
This code filters for facilities categorized under “Colleges,
Universities, and Professional Schools.” It standardizes facility names
by converting to uppercase, removing text following symbols (such as
“-”, “/” or “(”), and trimming extra whitespace. It then removes
unwanted tokens (e.g., “POWER PLANT”, “MEDICAL SCHOOL”) from the names
and aggregates emissions by year and university.
# Filter for facilities in the "Colleges, Universities, and Professional Schools" sector.
university_emissions <- epa_parent_company_emitters %>%
filter(`2022 NAICS Title` == "COLLEGES, UNIVERSITIES, AND PROFESSIONAL SCHOOLS") %>%
group_by(Year, Name) %>%
# Standardize facility names: convert to uppercase, remove text after "-" or "(" or "/" and trim whitespace.
mutate(
Name = str_to_upper(Name),
Name = str_remove(Name, "[-/(].*"),
Name = str_squish(Name)
)
# Define a vector of unwanted phrases to remove from the names.
unwanted_terms <- c(
"MAIN PWR PLANT", "UTILITY PLANT", "MU POWER PLANTL HEAT PLANT",
"PHYSICAL PLANT BUILDING", "CENTRAL HEAT PLANT", "CENTRAL POWER PLANT",
"STEAM PLANT", "HEATING PLANT", "MU POWER PLANT", "CENTRAL PLANT",
"POWER PLANT", "MAIN CAMPUS", "HEALTH SCIENCES CAMPUS", "MEDICAL SCHOOL",
"CAMPUS", "THE PRESIDENT AND FELLOWS OF", "MEDICAL CENTER"
)
pattern_unwanted <- paste(unwanted_terms, collapse = "|")
# Clean the university names by removing unwanted tokens and re-aggregate.
university_emissions_clean <- university_emissions %>%
mutate(Name = str_remove_all(Name, regex(pattern_unwanted, ignore_case = TRUE))) %>%
group_by(Year, Name) %>%
summarise(
`Total Emissions (tCO₂e)` = sum(`Emissions (tCO₂e)`, na.rm = TRUE)
) %>%
arrange(desc(Year), desc(`Total Emissions (tCO₂e)`))
# Here we select the top 10 for each year.
top10_universities <- university_emissions_clean %>%
group_by(Year) %>%
slice_head(n = 10)
# Create and print a single GT table with all the top 5 per year.
top10_universities %>%
gt() %>%
tab_header(
title = "US University Emissions (2016–2023)",
subtitle = "Top 5 Universities by Total Emissions (tCO₂e) for Each Year"
) %>%
fmt_number(
columns = vars(`Total Emissions (tCO₂e)`),
decimals = 0,
use_seps = TRUE
) %>%
cols_label(
Year = "Year",
Name = "University",
`Total Emissions (tCO₂e)` = "Total Emissions (tCO₂e)"
) %>%
cols_align(
align = "center",
columns = everything()
)
| US University Emissions (2016–2023) |
| Top 5 Universities by Total Emissions (tCO₂e) for Each Year |
| University |
Total Emissions (tCO₂e) |
| 2018 |
| MICHIGAN STATE UNIVERSITY |
365,556 |
| PURDUE UNIVERSITYWADE UTILITY |
279,654 |
| UNIVERSITY OF MICHIGAN |
273,224 |
| THE UNIVERSITY OF NC CHAPEL HILL |
249,429 |
| IOWA STATE UNIVERSITY ISU |
243,515 |
| UNIVERSITY OF ILLINOIS |
228,199 |
| UNIVERSITY OF CALIFORNIA LOS ANGELES |
224,013 |
| UMASS |
198,897 |
| CORNELL UNIVERSITY ITHACA |
173,135 |
| UNIV OF IOWA |
167,153 |
| 2019 |
| MICHIGAN STATE UNIVERSITY |
374,534 |
| PURDUE UNIVERSITYWADE UTILITY |
309,999 |
| UNIVERSITY OF MICHIGAN |
255,316 |
| IOWA STATE UNIVERSITY ISU |
230,995 |
| THE UNIVERSITY OF NC CHAPEL HILL |
227,737 |
| UNIVERSITY OF CALIFORNIA LOS ANGELES |
225,529 |
| UMASS |
195,404 |
| UNIV OF IOWA |
176,909 |
| UNIVERSITY OF ILLINOIS |
173,244 |
| CORNELL UNIVERSITY ITHACA |
164,598 |
| 2020 |
| MICHIGAN STATE UNIVERSITY |
337,811 |
| PURDUE UNIVERSITYWADE UTILITY |
304,068 |
| UNIVERSITY OF MICHIGAN |
245,596 |
| UNIVERSITY OF ALASKA FAIRBANKS |
222,288 |
| UNIVERSITY OF CALIFORNIA LOS ANGELES |
211,121 |
| IOWA STATE UNIVERSITY ISU |
203,541 |
| THE UNIVERSITY OF NC CHAPEL HILL |
197,498 |
| UMASS |
182,196 |
| UNIVERSITY OF ILLINOIS |
178,522 |
| UNIV OF IOWA |
153,483 |
| 2021 |
| PURDUE UNIVERSITYWADE UTILITY |
352,284 |
| MICHIGAN STATE UNIVERSITY |
345,753 |
| UNIVERSITY OF MICHIGAN |
251,535 |
| UNIVERSITY OF CALIFORNIA LOS ANGELES |
211,595 |
| IOWA STATE UNIVERSITY ISU |
208,785 |
| THE UNIVERSITY OF NC CHAPEL HILL |
201,214 |
| UNIVERSITY OF ILLINOIS |
199,047 |
| UMASS |
193,494 |
| UNIVERSITY OF CINCINNATI |
169,645 |
| UNIVERSITY OF CALIFORNIA SAN DIEGO |
160,812 |
| 2022 |
| MICHIGAN STATE UNIVERSITY |
334,551 |
| PURDUE UNIVERSITYWADE UTILITY |
327,645 |
| UNIVERSITY OF MICHIGAN |
288,662 |
| UNIVERSITY OF CALIFORNIA LOS ANGELES |
212,276 |
| IOWA STATE UNIVERSITY ISU |
211,657 |
| UNIVERSITY OF ILLINOIS |
211,321 |
| UMASS |
190,473 |
| THE UNIVERSITY OF NC CHAPEL HILL |
179,491 |
| MIT CENTRAL |
168,494 |
| UNIVERSITY OF CALIFORNIA SAN DIEGO |
164,055 |
| 2023 |
| MICHIGAN STATE UNIVERSITY |
298,938 |
| UNIVERSITY OF MICHIGAN |
272,741 |
| PURDUE UNIVERSITYWADE UTILITY |
259,288 |
| UNIVERSITY OF ILLINOIS |
212,947 |
| UNIVERSITY OF CALIFORNIA LOS ANGELES |
204,921 |
| UMASS |
189,537 |
| UNIVERSITY OF CINCINNATI |
172,058 |
| THE UNIVERSITY OF NC CHAPEL HILL |
169,133 |
| MIT CENTRAL |
163,804 |
| UNIVERSITY OF CALIFORNIA SAN DIEGO |
161,283 |
# Calculate cumulative emissions for each university across all years.
top10_cumulative <- university_emissions_clean %>%
group_by(Name) %>%
summarise(Cumulative_Emissions = sum(`Total Emissions (tCO₂e)`, na.rm = TRUE), .groups = "drop") %>%
arrange(desc(Cumulative_Emissions)) %>%
slice_head(n = 10) %>%
pull(Name)
# Filter the complete dataset to include only these top 10 universities.
top10_universities <- university_emissions_clean %>%
filter(Name %in% top10_cumulative)
# Create a line plot to show the emissions trend over time for the top 10 universities.
ggplot(top10_universities, aes(x = Year, y = `Total Emissions (tCO₂e)`, color = Name, group = Name)) +
geom_line(size = 1.2) + # Draw lines for each university
geom_point(size = 3) + # Add points for each observation
scale_y_continuous(labels = comma) + # Format y-axis labels with commas
labs(
title = "Emissions Trend for Top 10 US Universities",
x = "Year",
y = "Total Emissions (tCO₂e)",
color = "University"
) +
theme_minimal(base_size = 12) +
theme(
plot.title = element_text(face = "bold", hjust = 0.5),
legend.position = "right",
legend.text = element_text(size = 7),
)

Merging FRS and EPA Data for Census Analysis
Downloaded the FRS National Single CSV File from the EPA’s Facility
Registry Service on. This file contains information about facilities
regulated under multiple EPA environmental programs. Given its size, we
loaded and trimmed the dataset to retain only relevant columns (e.g.,
EPA Registry ID, program acronym, location data, facility name, sector).
This script loads the EPA FRS data, cleans it by converting key
identifiers to the correct type, and joins it with the EPA parent
company emissions data. Next, it aggregates the emissions at the census
block level. Later, you can merge this aggregated emissions dataset with
Census data (for example, median household income or demographic data)
using shared geographic identifiers (like census tract or block
group).
# ------------------------------------------------------------------------------
# Facility Registry Service (FRS) National Dataset
# Source: https://www.epa.gov/frs/epa-frs-facilities-state-single-file-csv-download
# Downloaded full U.S. file (FRS_NATIONAL_SINGLE.CSV)
# ------------------------------------------------------------------------------
# File size: ~2 GB — cleaned and trimmed to reduce memory load
# ------------------------------------------------------------------------------
# Load dataset — change this to your local filepath
#frs <- read_csv("DATA DOWNLOAD LOCATION HERE")
# Define keywords corresponding to the systems you need
#keywords <- c("E-GGRT", "EGRID", "CAMDBS", "AFS", "AIR", "TRI", "ICIS-AIR", "EMISSIONS", "GREENHOUSE GAS")
#pattern <- paste(keywords, collapse = "|")
#frs_cleaned <- frs %>%
# filter(grepl(pattern, INTEREST_TYPES, ignore.case = TRUE)) %>%
# select(REGISTRY_ID,
# FRS_FACILITY_DETAIL_REPORT_URL,
# LATITUDE83,
# LONGITUDE83,
# LOCATION_ADDRESS,
# CENSUS_BLOCK_CODE,
# CITY_NAME,
# FIPS_CODE,
# STATE_CODE,
# POSTAL_CODE,
# COUNTY_NAME,
# SIC_CODES,
# NAICS_CODES,
# HUC_CODE,
# TRIBAL_LAND_CODE,
# PGM_SYS_ACRNMS,
# CONGRESSIONAL_DIST_NUM) %>%
# clean_names()
#write.csv(frs_cleaned,
#here("Data", "Raw", "EPA_Facility_Registry_Service_Data", "FRS_NATIONAL.csv"),
#row.names = FALSE)
# Load the EPA FRS Query data
FRS_ID <- read_csv(here("Data", "Raw", "EPA_Facility_Registry_Service_Data", "FRS_NATIONAL.csv"))
# Convert the REGISTRY_ID column to character
FRS_ID <- FRS_ID %>%
mutate(registry_id = as.character(registry_id))
frs_epa_parent_company_emitters <- epa_parent_company_emitters %>%
left_join(FRS_ID, by = c("frs_id_facility" = "registry_id")) %>%
filter(!is.na(frs_id_facility)) %>%
select(-all_of("frs_id_facility")) %>%
clean_names() %>%
select(year, facility, name, emissions_t_co2e, census_block_code, facility_city, facility_zip, facility_state,
facility_county, parent_company, parent_company_emissions_t_co2e, longitude=longitude83, latitude=latitude83,
frs_facility_detail_report_url)
# Calculate total emissions per census block
emissions_by_census_block <- frs_epa_parent_company_emitters %>%
group_by(census_block_code, facility_county, facility_state, year) %>%
summarise(
total_emissions_t_co2e = sum(emissions_t_co2e, na.rm = TRUE),
facility_count = n()
)
write.csv(emissions_by_census_block,
here::here("Data", "Processed", "EPA_GHGP_Emissions_by_Census_Block.csv"),
row.names = FALSE)
Social Cost of Corporate Emissions
This section quantifies the Social Cost of Carbon (SCC) for each corporate parent by applying a predetermined SCC value (in USD per metric ton of CO2 equivalent) to the reported emissions. The SCC represents the present value of the future damages caused by one additional ton of CO2, encompassing impacts such as reduced agricultural productivity, adverse health outcomes, and increased infrastructure damage from extreme weather events. Seminal works like “The ‘Social Cost of Carbon’ Made Simple” (Newbold et al., 2010) and subsequent studies illustrate that the SCC captures the comprehensive economic cost of climate change. In this analysis, each corporate parent’s emissions (in tCO₂e) are multiplied by an EPA estimated SCC value—allowing for adjustments based on evolving scientific and economic estimates.
Direct Emitter Facilities
Supplier Facilities